You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@spark.apache.org by jggg777 <jo...@gmail.com> on 2016/11/21 17:23:03 UTC

Pasting into spark-shell doesn't work for Databricks example

I'm simply pasting in the UDAF example from this page and getting errors
(basic EMR setup with Spark 2.0):
https://docs.cloud.databricks.com/docs/latest/databricks_guide/index.html#04%20SQL,%20DataFrames%20%26%20Datasets/03%20UDF%20and%20UDAF%20-%20scala.html

The imports appear to work, but then I see errors like "not found: type
UserDefinedAggregateFunction".  

If I run ":paste" and paste it in that way it does work, but I'm interested
in knowing why Ctrl-V doesn't.  What is happening under the hood which makes
it seem like the imports are working even though they aren't?  And is there
a way to fix this in general?

>>>
scala> import org.apache.spark.sql.expressions.MutableAggregationBuffer
import org.apache.spark.sql.expressions.MutableAggregationBuffer

scala> import org.apache.spark.sql.expressions.UserDefinedAggregateFunction
import org.apache.spark.sql.expressions.UserDefinedAggregateFunction

scala> import org.apache.spark.sql.Row
import org.apache.spark.sql.Row

scala> import org.apache.spark.sql.types._
import org.apache.spark.sql.types._

scala>

scala> class GeometricMean extends UserDefinedAggregateFunction {
     |   // This is the input fields for your aggregate function.
     |   override def inputSchema: org.apache.spark.sql.types.StructType =
     |     StructType(StructField("value", DoubleType) :: Nil)
     |
     |   // This is the internal fields you keep for computing your
aggregate.
     |   override def bufferSchema: StructType = StructType(
     |     StructField("count", LongType) ::
     |     StructField("product", DoubleType) :: Nil
     |   )
     |
     |   // This is the output type of your aggregatation function.
     |   override def dataType: DataType = DoubleType
     |
     |   override def deterministic: Boolean = true
     |
     |   // This is the initial value for your buffer schema.
     |   override def initialize(buffer: MutableAggregationBuffer): Unit = {
     |     buffer(0) = 0L
     |     buffer(1) = 1.0
     |   }
     |
     |   // This is how to update your buffer schema given an input.
     |   override def update(buffer: MutableAggregationBuffer, input: Row):
Unit = {
     |     buffer(0) = buffer.getAs[Long](0) + 1
     |     buffer(1) = buffer.getAs[Double](1) * input.getAs[Double](0)
     |   }
     |
     |   // This is how to merge two objects with the bufferSchema type.
     |   override def merge(buffer1: MutableAggregationBuffer, buffer2:
Row): Unit = {
     |     buffer1(0) = buffer1.getAs[Long](0) + buffer2.getAs[Long](0)
     |     buffer1(1) = buffer1.getAs[Double](1) * buffer2.getAs[Double](1)
     |   }
     |
     |   // This is where you output the final value, given the final value
of your bufferSchema.
     |   override def evaluate(buffer: Row): Any = {
     |     math.pow(buffer.getDouble(1), 1.toDouble / buffer.getLong(0))
     |   }
     | }
<console>:11: error: not found: type UserDefinedAggregateFunction
       class GeometricMean extends UserDefinedAggregateFunction {
                                   ^
<console>:14: error: not found: value StructType
           StructType(StructField("value", DoubleType) :: Nil)
           ^
<console>:14: error: not found: value StructField
           StructType(StructField("value", DoubleType) :: Nil)
                      ^
<console>:14: error: not found: value DoubleType
           StructType(StructField("value", DoubleType) :: Nil)
                                           ^
<console>:17: error: not found: type StructType
         override def bufferSchema: StructType = StructType(
                                    ^
<console>:17: error: not found: value StructType
         override def bufferSchema: StructType = StructType(
                                                 ^
<console>:18: error: not found: value StructField
           StructField("count", LongType) ::
           ^
<console>:18: error: not found: value LongType
           StructField("count", LongType) ::
                                ^
<console>:19: error: not found: value StructField
           StructField("product", DoubleType) :: Nil
           ^
<console>:19: error: not found: value DoubleType
           StructField("product", DoubleType) :: Nil
                                  ^
<console>:23: error: not found: type DataType
         override def dataType: DataType = DoubleType
                                ^
<console>:23: error: not found: value DoubleType
         override def dataType: DataType = DoubleType
                                           ^
<console>:28: error: not found: type MutableAggregationBuffer
         override def initialize(buffer: MutableAggregationBuffer): Unit = {
                                         ^
<console>:34: error: not found: type MutableAggregationBuffer
         override def update(buffer: MutableAggregationBuffer, input: Row):
Unit = {
                                     ^
<console>:34: error: not found: type Row
         override def update(buffer: MutableAggregationBuffer, input: Row):
Unit = {
                                                                      ^
<console>:40: error: not found: type MutableAggregationBuffer
         override def merge(buffer1: MutableAggregationBuffer, buffer2:
Row): Unit = {
                                     ^
<console>:40: error: not found: type Row
         override def merge(buffer1: MutableAggregationBuffer, buffer2:
Row): Unit = {
                                                                        ^
<console>:46: error: not found: type Row
         override def evaluate(buffer: Row): Any = {
                                       ^



--
View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/Pasting-into-spark-shell-doesn-t-work-for-Databricks-example-tp28113.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.

---------------------------------------------------------------------
To unsubscribe e-mail: user-unsubscribe@spark.apache.org


Re: Pasting into spark-shell doesn't work for Databricks example

Posted by "Shixiong(Ryan) Zhu" <sh...@databricks.com>.
Scala has not yet resolved this issue. Once they fix and release a new
version, you can just upgrade the Scala version by yourself.

On Tue, Nov 22, 2016 at 10:58 PM, Denis Bolshakov <bolshakov.denis@gmail.com
> wrote:

> Hello Zhu,
>
> Thank you very much for such detailed explanation and providing
> workaround, it works fine.
>
> But since the problem is related to scala issue can we expect the fix in
> Spark 2.0? Or it's not a good idea to update such important dependency as
> scala in minor maintenance release?
>
> Kind regards,
> Denis
>
> On 22 November 2016 at 22:13, Shixiong(Ryan) Zhu <sh...@databricks.com>
> wrote:
>
>> The workaround is defining the imports and class together using ":paste".
>>
>> On Tue, Nov 22, 2016 at 11:12 AM, Shixiong(Ryan) Zhu <
>> shixiong@databricks.com> wrote:
>>
>>> This relates to a known issue: https://issues.apache.o
>>> rg/jira/browse/SPARK-14146 and https://issues.scala-lang.
>>> org/browse/SI-9799
>>>
>>> On Tue, Nov 22, 2016 at 6:37 AM, dbolshak <bo...@gmail.com>
>>> wrote:
>>>
>>>> Hello,
>>>>
>>>> We have the same issue,
>>>>
>>>> We use latest release 2.0.2.
>>>>
>>>> Setup with 1.6.1 works fine.
>>>>
>>>> Could somebody provide a workaround how to fix that?
>>>>
>>>> Kind regards,
>>>> Denis
>>>>
>>>>
>>>>
>>>> --
>>>> View this message in context: http://apache-spark-user-list.
>>>> 1001560.n3.nabble.com/Pasting-into-spark-shell-doesn-t-work-
>>>> for-Databricks-example-tp28113p28116.html
>>>> Sent from the Apache Spark User List mailing list archive at Nabble.com.
>>>>
>>>> ---------------------------------------------------------------------
>>>> To unsubscribe e-mail: user-unsubscribe@spark.apache.org
>>>>
>>>>
>>>
>>
>
>
> --
> //with Best Regards
> --Denis Bolshakov
> e-mail: bolshakov.denis@gmail.com
>

Re: Pasting into spark-shell doesn't work for Databricks example

Posted by Denis Bolshakov <bo...@gmail.com>.
Hello Zhu,

Thank you very much for such detailed explanation and providing workaround,
it works fine.

But since the problem is related to scala issue can we expect the fix in
Spark 2.0? Or it's not a good idea to update such important dependency as
scala in minor maintenance release?

Kind regards,
Denis

On 22 November 2016 at 22:13, Shixiong(Ryan) Zhu <sh...@databricks.com>
wrote:

> The workaround is defining the imports and class together using ":paste".
>
> On Tue, Nov 22, 2016 at 11:12 AM, Shixiong(Ryan) Zhu <
> shixiong@databricks.com> wrote:
>
>> This relates to a known issue: https://issues.apache.o
>> rg/jira/browse/SPARK-14146 and https://issues.scala-lang.
>> org/browse/SI-9799
>>
>> On Tue, Nov 22, 2016 at 6:37 AM, dbolshak <bo...@gmail.com>
>> wrote:
>>
>>> Hello,
>>>
>>> We have the same issue,
>>>
>>> We use latest release 2.0.2.
>>>
>>> Setup with 1.6.1 works fine.
>>>
>>> Could somebody provide a workaround how to fix that?
>>>
>>> Kind regards,
>>> Denis
>>>
>>>
>>>
>>> --
>>> View this message in context: http://apache-spark-user-list.
>>> 1001560.n3.nabble.com/Pasting-into-spark-shell-doesn-t-work-
>>> for-Databricks-example-tp28113p28116.html
>>> Sent from the Apache Spark User List mailing list archive at Nabble.com.
>>>
>>> ---------------------------------------------------------------------
>>> To unsubscribe e-mail: user-unsubscribe@spark.apache.org
>>>
>>>
>>
>


-- 
//with Best Regards
--Denis Bolshakov
e-mail: bolshakov.denis@gmail.com

Re: Pasting into spark-shell doesn't work for Databricks example

Posted by "Shixiong(Ryan) Zhu" <sh...@databricks.com>.
The workaround is defining the imports and class together using ":paste".

On Tue, Nov 22, 2016 at 11:12 AM, Shixiong(Ryan) Zhu <
shixiong@databricks.com> wrote:

> This relates to a known issue: https://issues.apache.
> org/jira/browse/SPARK-14146 and https://issues.scala-lang.
> org/browse/SI-9799
>
> On Tue, Nov 22, 2016 at 6:37 AM, dbolshak <bo...@gmail.com>
> wrote:
>
>> Hello,
>>
>> We have the same issue,
>>
>> We use latest release 2.0.2.
>>
>> Setup with 1.6.1 works fine.
>>
>> Could somebody provide a workaround how to fix that?
>>
>> Kind regards,
>> Denis
>>
>>
>>
>> --
>> View this message in context: http://apache-spark-user-list.
>> 1001560.n3.nabble.com/Pasting-into-spark-shell-doesn-t-work-
>> for-Databricks-example-tp28113p28116.html
>> Sent from the Apache Spark User List mailing list archive at Nabble.com.
>>
>> ---------------------------------------------------------------------
>> To unsubscribe e-mail: user-unsubscribe@spark.apache.org
>>
>>
>

Re: Pasting into spark-shell doesn't work for Databricks example

Posted by "Shixiong(Ryan) Zhu" <sh...@databricks.com>.
This relates to a known issue:
https://issues.apache.org/jira/browse/SPARK-14146 and
https://issues.scala-lang.org/browse/SI-9799

On Tue, Nov 22, 2016 at 6:37 AM, dbolshak <bo...@gmail.com> wrote:

> Hello,
>
> We have the same issue,
>
> We use latest release 2.0.2.
>
> Setup with 1.6.1 works fine.
>
> Could somebody provide a workaround how to fix that?
>
> Kind regards,
> Denis
>
>
>
> --
> View this message in context: http://apache-spark-user-list.
> 1001560.n3.nabble.com/Pasting-into-spark-shell-doesn-t-work-
> for-Databricks-example-tp28113p28116.html
> Sent from the Apache Spark User List mailing list archive at Nabble.com.
>
> ---------------------------------------------------------------------
> To unsubscribe e-mail: user-unsubscribe@spark.apache.org
>
>

Re: Pasting into spark-shell doesn't work for Databricks example

Posted by dbolshak <bo...@gmail.com>.
Hello,

We have the same issue,

We use latest release 2.0.2.

Setup with 1.6.1 works fine.

Could somebody provide a workaround how to fix that?

Kind regards,
Denis



--
View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/Pasting-into-spark-shell-doesn-t-work-for-Databricks-example-tp28113p28116.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.

---------------------------------------------------------------------
To unsubscribe e-mail: user-unsubscribe@spark.apache.org


Re: Pasting into spark-shell doesn't work for Databricks example

Posted by Denis Bolshakov <bo...@gmail.com>.
Hello,

We have the same issue,

We use latest release 2.0.2.

Setup with 1.6.1 works fine.

Could somebody provide a workaround how to fix that?

Kind regards,
Denis

On 21 November 2016 at 20:23, jggg777 <jo...@gmail.com> wrote:

> I'm simply pasting in the UDAF example from this page and getting errors
> (basic EMR setup with Spark 2.0):
> https://docs.cloud.databricks.com/docs/latest/databricks_
> guide/index.html#04%20SQL,%20DataFrames%20%26%20Datasets/
> 03%20UDF%20and%20UDAF%20-%20scala.html
>
> The imports appear to work, but then I see errors like "not found: type
> UserDefinedAggregateFunction".
>
> If I run ":paste" and paste it in that way it does work, but I'm interested
> in knowing why Ctrl-V doesn't.  What is happening under the hood which
> makes
> it seem like the imports are working even though they aren't?  And is there
> a way to fix this in general?
>
> >>>
> scala> import org.apache.spark.sql.expressions.MutableAggregationBuffer
> import org.apache.spark.sql.expressions.MutableAggregationBuffer
>
> scala> import org.apache.spark.sql.expressions.
> UserDefinedAggregateFunction
> import org.apache.spark.sql.expressions.UserDefinedAggregateFunction
>
> scala> import org.apache.spark.sql.Row
> import org.apache.spark.sql.Row
>
> scala> import org.apache.spark.sql.types._
> import org.apache.spark.sql.types._
>
> scala>
>
> scala> class GeometricMean extends UserDefinedAggregateFunction {
>      |   // This is the input fields for your aggregate function.
>      |   override def inputSchema: org.apache.spark.sql.types.StructType =
>      |     StructType(StructField("value", DoubleType) :: Nil)
>      |
>      |   // This is the internal fields you keep for computing your
> aggregate.
>      |   override def bufferSchema: StructType = StructType(
>      |     StructField("count", LongType) ::
>      |     StructField("product", DoubleType) :: Nil
>      |   )
>      |
>      |   // This is the output type of your aggregatation function.
>      |   override def dataType: DataType = DoubleType
>      |
>      |   override def deterministic: Boolean = true
>      |
>      |   // This is the initial value for your buffer schema.
>      |   override def initialize(buffer: MutableAggregationBuffer): Unit =
> {
>      |     buffer(0) = 0L
>      |     buffer(1) = 1.0
>      |   }
>      |
>      |   // This is how to update your buffer schema given an input.
>      |   override def update(buffer: MutableAggregationBuffer, input: Row):
> Unit = {
>      |     buffer(0) = buffer.getAs[Long](0) + 1
>      |     buffer(1) = buffer.getAs[Double](1) * input.getAs[Double](0)
>      |   }
>      |
>      |   // This is how to merge two objects with the bufferSchema type.
>      |   override def merge(buffer1: MutableAggregationBuffer, buffer2:
> Row): Unit = {
>      |     buffer1(0) = buffer1.getAs[Long](0) + buffer2.getAs[Long](0)
>      |     buffer1(1) = buffer1.getAs[Double](1) * buffer2.getAs[Double](1)
>      |   }
>      |
>      |   // This is where you output the final value, given the final value
> of your bufferSchema.
>      |   override def evaluate(buffer: Row): Any = {
>      |     math.pow(buffer.getDouble(1), 1.toDouble / buffer.getLong(0))
>      |   }
>      | }
> <console>:11: error: not found: type UserDefinedAggregateFunction
>        class GeometricMean extends UserDefinedAggregateFunction {
>                                    ^
> <console>:14: error: not found: value StructType
>            StructType(StructField("value", DoubleType) :: Nil)
>            ^
> <console>:14: error: not found: value StructField
>            StructType(StructField("value", DoubleType) :: Nil)
>                       ^
> <console>:14: error: not found: value DoubleType
>            StructType(StructField("value", DoubleType) :: Nil)
>                                            ^
> <console>:17: error: not found: type StructType
>          override def bufferSchema: StructType = StructType(
>                                     ^
> <console>:17: error: not found: value StructType
>          override def bufferSchema: StructType = StructType(
>                                                  ^
> <console>:18: error: not found: value StructField
>            StructField("count", LongType) ::
>            ^
> <console>:18: error: not found: value LongType
>            StructField("count", LongType) ::
>                                 ^
> <console>:19: error: not found: value StructField
>            StructField("product", DoubleType) :: Nil
>            ^
> <console>:19: error: not found: value DoubleType
>            StructField("product", DoubleType) :: Nil
>                                   ^
> <console>:23: error: not found: type DataType
>          override def dataType: DataType = DoubleType
>                                 ^
> <console>:23: error: not found: value DoubleType
>          override def dataType: DataType = DoubleType
>                                            ^
> <console>:28: error: not found: type MutableAggregationBuffer
>          override def initialize(buffer: MutableAggregationBuffer): Unit =
> {
>                                          ^
> <console>:34: error: not found: type MutableAggregationBuffer
>          override def update(buffer: MutableAggregationBuffer, input: Row):
> Unit = {
>                                      ^
> <console>:34: error: not found: type Row
>          override def update(buffer: MutableAggregationBuffer, input: Row):
> Unit = {
>                                                                       ^
> <console>:40: error: not found: type MutableAggregationBuffer
>          override def merge(buffer1: MutableAggregationBuffer, buffer2:
> Row): Unit = {
>                                      ^
> <console>:40: error: not found: type Row
>          override def merge(buffer1: MutableAggregationBuffer, buffer2:
> Row): Unit = {
>                                                                         ^
> <console>:46: error: not found: type Row
>          override def evaluate(buffer: Row): Any = {
>                                        ^
>
>
>
> --
> View this message in context: http://apache-spark-user-list.
> 1001560.n3.nabble.com/Pasting-into-spark-shell-doesn-t-work-
> for-Databricks-example-tp28113.html
> Sent from the Apache Spark User List mailing list archive at Nabble.com.
>
> ---------------------------------------------------------------------
> To unsubscribe e-mail: user-unsubscribe@spark.apache.org
>
>


-- 
//with Best Regards
--Denis Bolshakov
e-mail: bolshakov.denis@gmail.com