You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@spark.apache.org by Manoj Samel <ma...@gmail.com> on 2014/03/30 19:16:44 UTC

SparkSQL "where" with BigDecimal type gives stacktrace

Hi,

If I do a where on BigDecimal, I get a stack trace. Changing BigDecimal to
Double works ...
....
scala> case class JournalLine(account: String, credit: BigDecimal, debit:
BigDecimal, date: String, company: String, currency: String, costcenter:
String, region: String)
defined class JournalLine
...
scala> jl.where('credit > 0).foreach(println)
scala.MatchError: scala.BigDecimal (of class
scala.reflect.internal.Types$TypeRef$$anon$3)
at
org.apache.spark.sql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:41)
at
org.apache.spark.sql.catalyst.ScalaReflection$$anonfun$schemaFor$1.apply(ScalaReflection.scala:45)
at
org.apache.spark.sql.catalyst.ScalaReflection$$anonfun$schemaFor$1.apply(ScalaReflection.scala:45)
at
scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
at
scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
at scala.collection.immutable.List.foreach(List.scala:318)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
at scala.collection.AbstractTraversable.map(Traversable.scala:105)
at
org.apache.spark.sql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:45)
at
org.apache.spark.sql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:38)
at
org.apache.spark.sql.catalyst.ScalaReflection$.attributesFor(ScalaReflection.scala:32)
at
org.apache.spark.sql.execution.ExistingRdd$.fromProductRdd(basicOperators.scala:128)
at org.apache.spark.sql.SQLContext.createSchemaRDD(SQLContext.scala:79)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:39)
at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:44)
at $iwC$$iwC$$iwC$$iwC.<init>(<console>:46)
at $iwC$$iwC$$iwC.<init>(<console>:48)
at $iwC$$iwC.<init>(<console>:50)
at $iwC.<init>(<console>:52)
at <init>(<console>:54)
at .<init>(<console>:58)
at .<clinit>(<console>)
at .<init>(<console>:7)
at .<clinit>(<console>)
at $print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:601)
at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:777)
at
org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1045)
at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:614)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:645)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:609)
at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:795)
at
org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:840)
at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:752)
at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:600)
at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:607)
at org.apache.spark.repl.SparkILoop.loop(SparkILoop.scala:610)
at
org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:935)
at
org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:883)
at
org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:883)
at
scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:883)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:981)
at org.apache.spark.repl.Main$.main(Main.scala:31)
at org.apache.spark.repl.Main.main(Main.scala)

Thanks,

Re: SparkSQL "where" with BigDecimal type gives stacktrace

Posted by Michael Armbrust <mi...@databricks.com>.
This was not intentional, here is a JIRA
https://issues.apache.org/jira/browse/SPARK-1364

Note that you can create big decimals by using the Decimal type in a
HiveContext.

Date is not yet a supported data type.


On Sun, Mar 30, 2014 at 5:35 PM, Manoj Samel <ma...@gmail.com>wrote:

> Hi,
>
> Would the same issue be present for other Java type like Date ?
>
> Converting the person/teenager example on Patricks page reproduces the
> problem ...
>
> Thanks,
>
>
> scala> import scala.math
> import scala.math
>
> scala> case class Person(name: String, age: BigDecimal)
>  defined class Person
>
> scala> val people =
> sc.textFile("/data/spark/examples/src/main/resources/people.txt").map(_.split(",")).map(p
> => Person(p(0), BigDecimal(p(1).trim.toInt)))
> 14/03/31 00:23:40 INFO MemoryStore: ensureFreeSpace(32960) called with
> curMem=0, maxMem=308713881
> 14/03/31 00:23:40 INFO MemoryStore: Block broadcast_0 stored as values to
> memory (estimated size 32.2 KB, free 294.4 MB)
> people: org.apache.spark.rdd.RDD[Person] = MappedRDD[3] at map at
> <console>:20
>
> scala> people take 1
> ...
>
> scala> val t = people.where('age > 12 )
> scala.MatchError: scala.BigDecimal (of class
> scala.reflect.internal.Types$TypeRef$$anon$3)
> at
> org.apache.spark.sql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:41)
>  at
> org.apache.spark.sql.catalyst.ScalaReflection$$anonfun$schemaFor$1.apply(ScalaReflection.scala:45)
> at
> org.apache.spark.sql.catalyst.ScalaReflection$$anonfun$schemaFor$1.apply(ScalaReflection.scala:45)
>  at
> scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
> at
> scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
>  at scala.collection.immutable.List.foreach(List.scala:318)
> at scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
>  at scala.collection.AbstractTraversable.map(Traversable.scala:105)
> at
> org.apache.spark.sql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:45)
>  at
> org.apache.spark.sql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:38)
> at
> org.apache.spark.sql.catalyst.ScalaReflection$.attributesFor(ScalaReflection.scala:32)
>  at
> org.apache.spark.sql.execution.ExistingRdd$.fromProductRdd(basicOperators.scala:128)
> at org.apache.spark.sql.SQLContext.createSchemaRDD(SQLContext.scala:79)
>  at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:22)
> at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:27)
>  at $iwC$$iwC$$iwC$$iwC.<init>(<console>:29)
> at $iwC$$iwC$$iwC.<init>(<console>:31)
>  at $iwC$$iwC.<init>(<console>:33)
> at $iwC.<init>(<console>:35)
> at <init>(<console>:37)
>  at .<init>(<console>:41)
> at .<clinit>(<console>)
> at .<init>(<console>:7)
>  at .<clinit>(<console>)
> at $print(<console>)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>  at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>  at java.lang.reflect.Method.invoke(Method.java:601)
> at
> org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:777)
>  at
> org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1045)
> at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:614)
>  at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:645)
> at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:609)
>  at
> org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:795)
> at
> org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:840)
>  at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:752)
> at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:600)
>  at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:607)
> at org.apache.spark.repl.SparkILoop.loop(SparkILoop.scala:610)
>  at
> org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:935)
> at
> org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:883)
>  at
> org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:883)
> at
> scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
>  at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:883)
> at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:981)
>  at org.apache.spark.repl.Main$.main(Main.scala:31)
> at org.apache.spark.repl.Main.main(Main.scala)
>
>
>
> On Sun, Mar 30, 2014 at 11:04 AM, Aaron Davidson <il...@gmail.com>wrote:
>
>> Well, the error is coming from this case statement not matching on the
>> BigDecimal type:
>> https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala#L41
>>
>> This seems to be a bug because there is a corresponding Catalyst DataType
>> for BigDecimal, just no way to produce a schema for it. A patch should be
>> straightforward enough to match against typeOf[BigDecimal] assuming this
>> was not for some reason intentional.
>>
>>
>> On Sun, Mar 30, 2014 at 10:43 AM, smallmonkey491@hotmail.com <
>> smallmonkey491@hotmail.com> wrote:
>>
>>>  can I get the whole operation? then i can try to locate  the error
>>>
>>> ------------------------------
>>>  smallmonkey491@hotmail.com
>>>
>>>  *From:* Manoj Samel <ma...@gmail.com>
>>> *Date:* 2014-03-31 01:16
>>> *To:* user <us...@spark.apache.org>
>>> *Subject:* SparkSQL "where" with BigDecimal type gives stacktrace
>>>  Hi,
>>>
>>> If I do a where on BigDecimal, I get a stack trace. Changing BigDecimal
>>> to Double works ...
>>> ....
>>>  scala> case class JournalLine(account: String, credit: BigDecimal,
>>> debit: BigDecimal, date: String, company: String, currency: String,
>>> costcenter: String, region: String)
>>> defined class JournalLine
>>> ...
>>>  scala> jl.where('credit > 0).foreach(println)
>>> scala.MatchError: scala.BigDecimal (of class
>>> scala.reflect.internal.Types$TypeRef$$anon$3)
>>> at
>>> org.apache.sparksql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:41)
>>> at
>>> org.apache.spark.sql.catalyst.ScalaReflection$$anonfun$schemaFor$1.apply(ScalaReflection.scala:45)
>>> at
>>> org.apache.spark.sql.catalyst.ScalaReflection$$anonfun$schemaFor$1.apply(ScalaReflection.scala:45)
>>> at
>>> scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
>>> at
>>> scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
>>> at scala.collection.immutable.List.foreach(List.scala:318)
>>> at scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
>>> at scala.collection.AbstractTraversable.map(Traversable.scala:105)
>>> at
>>> org.apache.spark.sql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:45)
>>> at
>>> org.apache.spark.sql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:38)
>>> at
>>> org.apache.spark.sql.catalyst.ScalaReflection$.attributesFor(ScalaReflection.scala:32)
>>> at
>>> org.apache.spark.sql.execution.ExistingRdd$.fromProductRdd(basicOperators.scala:128)
>>> at org.apache.spark.sql.SQLContext.createSchemaRDD(SQLContext.scala:79)
>>> at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:39)
>>> at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:44)
>>> at $iwC$$iwC$$iwC$$iwC.<init>(<console>:46)
>>> at $iwC$$iwC$$iwC.<init>(<console>:48)
>>> at $iwC$$iwC.<init>(<console>:50)
>>> at $iwC.<init>(<console>:52)
>>> at <init>(<console>:54)
>>> at .<init>(<console>:58)
>>> at .<clinit>(<console>)
>>> at .<init>(<console>:7)
>>> at .<clinit>(<console>)
>>> at $print(<console>)
>>> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>>> at
>>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>>> at
>>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>>> at java.lang.reflect.Method.invoke(Method.java:601)
>>> at
>>> org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:777)
>>> at
>>> org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1045)
>>> at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:614)
>>> at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:645)
>>> at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:609)
>>> at
>>> org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:795)
>>> at
>>> org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:840)
>>> at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:752)
>>> at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:600)
>>> at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:607)
>>>  at org.apache.spark.repl.SparkILoop.loop(SparkILoop.scala:610)
>>> at
>>> org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:935)
>>> at
>>> org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:883)
>>> at
>>> org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:883)
>>> at
>>> scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
>>> at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:883)
>>> at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:981)
>>> at org.apache.spark.repl.Main$.main(Main.scala:31)
>>> at org.apache.spark.repl.Main.main(Main.scala)
>>>
>>> Thanks,
>>>
>>>
>>>
>>
>

Re: SparkSQL "where" with BigDecimal type gives stacktrace

Posted by Manoj Samel <ma...@gmail.com>.
Hi,

Would the same issue be present for other Java type like Date ?

Converting the person/teenager example on Patricks page reproduces the
problem ...

Thanks,


scala> import scala.math
import scala.math

scala> case class Person(name: String, age: BigDecimal)
defined class Person

scala> val people =
sc.textFile("/data/spark/examples/src/main/resources/people.txt").map(_.split(",")).map(p
=> Person(p(0), BigDecimal(p(1).trim.toInt)))
14/03/31 00:23:40 INFO MemoryStore: ensureFreeSpace(32960) called with
curMem=0, maxMem=308713881
14/03/31 00:23:40 INFO MemoryStore: Block broadcast_0 stored as values to
memory (estimated size 32.2 KB, free 294.4 MB)
people: org.apache.spark.rdd.RDD[Person] = MappedRDD[3] at map at
<console>:20

scala> people take 1
...

scala> val t = people.where('age > 12 )
scala.MatchError: scala.BigDecimal (of class
scala.reflect.internal.Types$TypeRef$$anon$3)
at
org.apache.spark.sql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:41)
at
org.apache.spark.sql.catalyst.ScalaReflection$$anonfun$schemaFor$1.apply(ScalaReflection.scala:45)
at
org.apache.spark.sql.catalyst.ScalaReflection$$anonfun$schemaFor$1.apply(ScalaReflection.scala:45)
at
scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
at
scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
at scala.collection.immutable.List.foreach(List.scala:318)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
at scala.collection.AbstractTraversable.map(Traversable.scala:105)
at
org.apache.spark.sql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:45)
at
org.apache.spark.sql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:38)
at
org.apache.spark.sql.catalyst.ScalaReflection$.attributesFor(ScalaReflection.scala:32)
at
org.apache.spark.sql.execution.ExistingRdd$.fromProductRdd(basicOperators.scala:128)
at org.apache.spark.sql.SQLContext.createSchemaRDD(SQLContext.scala:79)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:22)
at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:27)
at $iwC$$iwC$$iwC$$iwC.<init>(<console>:29)
at $iwC$$iwC$$iwC.<init>(<console>:31)
at $iwC$$iwC.<init>(<console>:33)
at $iwC.<init>(<console>:35)
at <init>(<console>:37)
at .<init>(<console>:41)
at .<clinit>(<console>)
at .<init>(<console>:7)
at .<clinit>(<console>)
at $print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:601)
at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:777)
at
org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1045)
at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:614)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:645)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:609)
at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:795)
at
org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:840)
at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:752)
at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:600)
at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:607)
at org.apache.spark.repl.SparkILoop.loop(SparkILoop.scala:610)
at
org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:935)
at
org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:883)
at
org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:883)
at
scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:883)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:981)
at org.apache.spark.repl.Main$.main(Main.scala:31)
at org.apache.spark.repl.Main.main(Main.scala)



On Sun, Mar 30, 2014 at 11:04 AM, Aaron Davidson <il...@gmail.com> wrote:

> Well, the error is coming from this case statement not matching on the
> BigDecimal type:
> https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala#L41
>
> This seems to be a bug because there is a corresponding Catalyst DataType
> for BigDecimal, just no way to produce a schema for it. A patch should be
> straightforward enough to match against typeOf[BigDecimal] assuming this
> was not for some reason intentional.
>
>
> On Sun, Mar 30, 2014 at 10:43 AM, smallmonkey491@hotmail.com <
> smallmonkey491@hotmail.com> wrote:
>
>>  can I get the whole operation? then i can try to locate  the error
>>
>> ------------------------------
>>  smallmonkey491@hotmail.com
>>
>>  *From:* Manoj Samel <ma...@gmail.com>
>> *Date:* 2014-03-31 01:16
>> *To:* user <us...@spark.apache.org>
>> *Subject:* SparkSQL "where" with BigDecimal type gives stacktrace
>>  Hi,
>>
>> If I do a where on BigDecimal, I get a stack trace. Changing BigDecimal
>> to Double works ...
>> ....
>>  scala> case class JournalLine(account: String, credit: BigDecimal,
>> debit: BigDecimal, date: String, company: String, currency: String,
>> costcenter: String, region: String)
>> defined class JournalLine
>> ...
>>  scala> jl.where('credit > 0).foreach(println)
>> scala.MatchError: scala.BigDecimal (of class
>> scala.reflect.internal.Types$TypeRef$$anon$3)
>> at
>> org.apache.sparksql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:41)
>> at
>> org.apache.spark.sql.catalyst.ScalaReflection$$anonfun$schemaFor$1.apply(ScalaReflection.scala:45)
>> at
>> org.apache.spark.sql.catalyst.ScalaReflection$$anonfun$schemaFor$1.apply(ScalaReflection.scala:45)
>> at
>> scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
>> at
>> scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
>> at scala.collection.immutable.List.foreach(List.scala:318)
>> at scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
>> at scala.collection.AbstractTraversable.map(Traversable.scala:105)
>> at
>> org.apache.spark.sql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:45)
>> at
>> org.apache.spark.sql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:38)
>> at
>> org.apache.spark.sql.catalyst.ScalaReflection$.attributesFor(ScalaReflection.scala:32)
>> at
>> org.apache.spark.sql.execution.ExistingRdd$.fromProductRdd(basicOperators.scala:128)
>> at org.apache.spark.sql.SQLContext.createSchemaRDD(SQLContext.scala:79)
>> at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:39)
>> at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:44)
>> at $iwC$$iwC$$iwC$$iwC.<init>(<console>:46)
>> at $iwC$$iwC$$iwC.<init>(<console>:48)
>> at $iwC$$iwC.<init>(<console>:50)
>> at $iwC.<init>(<console>:52)
>> at <init>(<console>:54)
>> at .<init>(<console>:58)
>> at .<clinit>(<console>)
>> at .<init>(<console>:7)
>> at .<clinit>(<console>)
>> at $print(<console>)
>> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>> at
>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>> at
>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>> at java.lang.reflect.Method.invoke(Method.java:601)
>> at
>> org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:777)
>> at
>> org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1045)
>> at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:614)
>> at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:645)
>> at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:609)
>> at
>> org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:795)
>> at
>> org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:840)
>> at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:752)
>> at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:600)
>> at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:607)
>>  at org.apache.spark.repl.SparkILoop.loop(SparkILoop.scala:610)
>> at
>> org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:935)
>> at
>> org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:883)
>> at
>> org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:883)
>> at
>> scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
>> at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:883)
>> at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:981)
>> at org.apache.spark.repl.Main$.main(Main.scala:31)
>> at org.apache.spark.repl.Main.main(Main.scala)
>>
>> Thanks,
>>
>>
>>
>

Re: SparkSQL "where" with BigDecimal type gives stacktrace

Posted by Aaron Davidson <il...@gmail.com>.
Well, the error is coming from this case statement not matching on the
BigDecimal type:
https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala#L41

This seems to be a bug because there is a corresponding Catalyst DataType
for BigDecimal, just no way to produce a schema for it. A patch should be
straightforward enough to match against typeOf[BigDecimal] assuming this
was not for some reason intentional.


On Sun, Mar 30, 2014 at 10:43 AM, smallmonkey491@hotmail.com <
smallmonkey491@hotmail.com> wrote:

>  can I get the whole operation? then i can try to locate  the error
>
> ------------------------------
>  smallmonkey491@hotmail.com
>
>  *From:* Manoj Samel <ma...@gmail.com>
> *Date:* 2014-03-31 01:16
> *To:* user <us...@spark.apache.org>
> *Subject:* SparkSQL "where" with BigDecimal type gives stacktrace
>  Hi,
>
> If I do a where on BigDecimal, I get a stack trace. Changing BigDecimal to
> Double works ...
> ....
>  scala> case class JournalLine(account: String, credit: BigDecimal,
> debit: BigDecimal, date: String, company: String, currency: String,
> costcenter: String, region: String)
> defined class JournalLine
> ...
>  scala> jl.where('credit > 0).foreach(println)
> scala.MatchError: scala.BigDecimal (of class
> scala.reflect.internal.Types$TypeRef$$anon$3)
> at
> org.apache.sparksql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:41)
> at
> org.apache.spark.sql.catalyst.ScalaReflection$$anonfun$schemaFor$1.apply(ScalaReflection.scala:45)
> at
> org.apache.spark.sql.catalyst.ScalaReflection$$anonfun$schemaFor$1.apply(ScalaReflection.scala:45)
> at
> scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
> at
> scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
> at scala.collection.immutable.List.foreach(List.scala:318)
> at scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
> at scala.collection.AbstractTraversable.map(Traversable.scala:105)
> at
> org.apache.spark.sql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:45)
> at
> org.apache.spark.sql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:38)
> at
> org.apache.spark.sql.catalyst.ScalaReflection$.attributesFor(ScalaReflection.scala:32)
> at
> org.apache.spark.sql.execution.ExistingRdd$.fromProductRdd(basicOperators.scala:128)
> at org.apache.spark.sql.SQLContext.createSchemaRDD(SQLContext.scala:79)
> at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:39)
> at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:44)
> at $iwC$$iwC$$iwC$$iwC.<init>(<console>:46)
> at $iwC$$iwC$$iwC.<init>(<console>:48)
> at $iwC$$iwC.<init>(<console>:50)
> at $iwC.<init>(<console>:52)
> at <init>(<console>:54)
> at .<init>(<console>:58)
> at .<clinit>(<console>)
> at .<init>(<console>:7)
> at .<clinit>(<console>)
> at $print(<console>)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:601)
> at
> org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:777)
> at
> org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1045)
> at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:614)
> at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:645)
> at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:609)
> at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:795)
> at
> org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:840)
> at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:752)
> at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:600)
> at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:607)
>  at org.apache.spark.repl.SparkILoop.loop(SparkILoop.scala:610)
> at
> org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:935)
> at
> org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:883)
> at
> org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:883)
> at
> scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
> at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:883)
> at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:981)
> at org.apache.spark.repl.Main$.main(Main.scala:31)
> at org.apache.spark.repl.Main.main(Main.scala)
>
> Thanks,
>
>
>

Re: SparkSQL "where" with BigDecimal type gives stacktrace

Posted by "smallmonkey491@hotmail.com" <sm...@hotmail.com>.
can I get the whole operation? then i can try to locate  the error




smallmonkey491@hotmail.com

From: Manoj Samel
Date: 2014-03-31 01:16
To: user
Subject: SparkSQL "where" with BigDecimal type gives stacktrace
Hi,


If I do a where on BigDecimal, I get a stack trace. Changing BigDecimal to Double works ...
....
scala> case class JournalLine(account: String, credit: BigDecimal, debit: BigDecimal, date: String, company: String, currency: String, costcenter: String, region: String)
defined class JournalLine
...
scala> jl.where('credit > 0).foreach(println)
scala.MatchError: scala.BigDecimal (of class scala.reflect.internal.Types$TypeRef$$anon$3)
at org.apache.sparksql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:41)
at org.apache.spark.sql.catalyst.ScalaReflection$$anonfun$schemaFor$1.apply(ScalaReflection.scala:45)
at org.apache.spark.sql.catalyst.ScalaReflection$$anonfun$schemaFor$1.apply(ScalaReflection.scala:45)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
at scala.collection.immutable.List.foreach(List.scala:318)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
at scala.collection.AbstractTraversable.map(Traversable.scala:105)
at org.apache.spark.sql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:45)
at org.apache.spark.sql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:38)
at org.apache.spark.sql.catalyst.ScalaReflection$.attributesFor(ScalaReflection.scala:32)
at org.apache.spark.sql.execution.ExistingRdd$.fromProductRdd(basicOperators.scala:128)
at org.apache.spark.sql.SQLContext.createSchemaRDD(SQLContext.scala:79)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:39)
at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:44)
at $iwC$$iwC$$iwC$$iwC.<init>(<console>:46)
at $iwC$$iwC$$iwC.<init>(<console>:48)
at $iwC$$iwC.<init>(<console>:50)
at $iwC.<init>(<console>:52)
at <init>(<console>:54)
at .<init>(<console>:58)
at .<clinit>(<console>)
at .<init>(<console>:7)
at .<clinit>(<console>)
at $print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:601)
at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:777)
at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1045)
at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:614)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:645)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:609)
at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:795)
at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:840)
at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:752)
at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:600)
at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:607)
at org.apache.spark.repl.SparkILoop.loop(SparkILoop.scala:610)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:935)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:883)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:883)
at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:883)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:981)
at org.apache.spark.repl.Main$.main(Main.scala:31)
at org.apache.spark.repl.Main.main(Main.scala)


Thanks,