You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Mateusz Michalowski (JIRA)" <ji...@apache.org> on 2015/07/17 14:32:04 UTC
[jira] [Updated] (SPARK-9135) Filter fails when filtering with a
method reference to overloaded method
[ https://issues.apache.org/jira/browse/SPARK-9135?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Mateusz Michalowski updated SPARK-9135:
---------------------------------------
Description:
Filter fails when filtering with a method reference to overloaded method.
In the example below we filter by Fruit::isRed, which is overloaded by Apple::isRed and Banana::isRed.
If we call {code} apples.filter(Fruit::isRed) {code}
and then {code} bananas.filter(Fruit::isRed) {code}
or {code} fruit.filter(Fruit::isRed) {code}
Spark will try to cast Apple::isRed to Banana::isRed - and then throw as a result.
No exception will happen if we use lambda instead of method reference:
{code}
.filter(f -> f.isRed())
{code}
I attach a test setup below:
{code:java}
package com.doggybites;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.Serializable;
import java.util.Arrays;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.junit.Assert.assertThat;
public class SparkTest {
static abstract class Fruit implements Serializable {
abstract boolean isRed();
}
static class Banana extends Fruit {
@Override
boolean isRed() {
return false;
}
}
static class Apple extends Fruit {
@Override
boolean isRed() {
return true;
}
}
private JavaSparkContext sparkContext;
@Before
public void setUp() throws Exception {
SparkConf sparkConf = new SparkConf().setAppName("test").setMaster("local[2]");
sparkContext = new JavaSparkContext(sparkConf);
}
@After
public void tearDown() throws Exception {
sparkContext.stop();
}
private <T> JavaRDD<T> toRdd(T ... array) {
return sparkContext.parallelize(Arrays.asList(array));
}
@Test
public void filters_apples_and_bananas_with_method_reference() {
JavaRDD<Apple> appleRdd = toRdd(new Apple());
JavaRDD<Banana> bananaRdd = toRdd(new Banana());
long redAppleCount = appleRdd.filter(Fruit::isRed).count();
long redBananaCount = bananaRdd.filter(Fruit::isRed).count();
assertThat(redAppleCount, equalTo(1L));
assertThat(redBananaCount, equalTo(0L));
}
}
{code}
The test above throws:
{code}
15/07/17 14:10:04 ERROR Executor: Exception in task 1.0 in stage 1.0 (TID 3)
java.lang.ClassCastException: com.doggybites.SparkTest$Banana cannot be cast to com.doggybites.SparkTest$Apple
at com.doggybites.SparkTest$$Lambda$2/976119300.call(Unknown Source)
at org.apache.spark.api.java.JavaRDD$$anonfun$filter$1.apply(JavaRDD.scala:78)
at org.apache.spark.api.java.JavaRDD$$anonfun$filter$1.apply(JavaRDD.scala:78)
at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:390)
at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1626)
at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1099)
at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1099)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1767)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1767)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63)
at org.apache.spark.scheduler.Task.run(Task.scala:70)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
15/07/17 14:10:04 WARN TaskSetManager: Lost task 1.0 in stage 1.0 (TID 3, localhost): java.lang.ClassCastException: com.doggybites.SparkTest$Banana cannot be cast to com.doggybites.SparkTest$Apple
at com.doggybites.SparkTest$$Lambda$2/976119300.call(Unknown Source)
at org.apache.spark.api.java.JavaRDD$$anonfun$filter$1.apply(JavaRDD.scala:78)
at org.apache.spark.api.java.JavaRDD$$anonfun$filter$1.apply(JavaRDD.scala:78)
at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:390)
at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1626)
at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1099)
at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1099)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1767)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1767)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63)
at org.apache.spark.scheduler.Task.run(Task.scala:70)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
{code}
was:
Filter fails when filtering with a method reference to overloaded method.
In the example below we filter by Fruit::isRed, which is overloaded by Apple::isRed and Banana::isRed.
If we call {quote} apples.filter(Fruit::isRed) {quote}
and then {quote} bananas.filter(Fruit::isRed) {quote}
or {quote} fruit.filter(Fruit::isRed) {quote}
Spark will try to cast Apple::isRed to Banana::isRed - and then throw as a result.
No exception will happen if we use lambda instead of method reference:
{code}
.filter(f -> f.isRed())
{code}
{code:java}
package com.doggybites;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.Serializable;
import java.util.Arrays;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.junit.Assert.assertThat;
public class SparkTest {
static abstract class Fruit implements Serializable {
abstract boolean isRed();
}
static class Banana extends Fruit {
@Override
boolean isRed() {
return false;
}
}
static class Apple extends Fruit {
@Override
boolean isRed() {
return true;
}
}
private JavaSparkContext sparkContext;
@Before
public void setUp() throws Exception {
SparkConf sparkConf = new SparkConf().setAppName("test").setMaster("local[2]");
sparkContext = new JavaSparkContext(sparkConf);
}
@After
public void tearDown() throws Exception {
sparkContext.stop();
}
private <T> JavaRDD<T> toRdd(T ... array) {
return sparkContext.parallelize(Arrays.asList(array));
}
@Test
public void filters_apples_and_bananas_with_method_reference() {
JavaRDD<Apple> appleRdd = toRdd(new Apple());
JavaRDD<Banana> bananaRdd = toRdd(new Banana());
long redAppleCount = appleRdd.filter(Fruit::isRed).count();
long redBananaCount = bananaRdd.filter(Fruit::isRed).count();
assertThat(redAppleCount, equalTo(1L));
assertThat(redBananaCount, equalTo(0L));
}
}
{code}
The test above throws:
{quote}
15/07/17 14:10:04 ERROR Executor: Exception in task 1.0 in stage 1.0 (TID 3)
java.lang.ClassCastException: com.doggybites.SparkTest$Banana cannot be cast to com.doggybites.SparkTest$Apple
at com.doggybites.SparkTest$$Lambda$2/976119300.call(Unknown Source)
at org.apache.spark.api.java.JavaRDD$$anonfun$filter$1.apply(JavaRDD.scala:78)
at org.apache.spark.api.java.JavaRDD$$anonfun$filter$1.apply(JavaRDD.scala:78)
at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:390)
at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1626)
at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1099)
at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1099)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1767)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1767)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63)
at org.apache.spark.scheduler.Task.run(Task.scala:70)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
15/07/17 14:10:04 WARN TaskSetManager: Lost task 1.0 in stage 1.0 (TID 3, localhost): java.lang.ClassCastException: com.doggybites.SparkTest$Banana cannot be cast to com.doggybites.SparkTest$Apple
at com.doggybites.SparkTest$$Lambda$2/976119300.call(Unknown Source)
at org.apache.spark.api.java.JavaRDD$$anonfun$filter$1.apply(JavaRDD.scala:78)
at org.apache.spark.api.java.JavaRDD$$anonfun$filter$1.apply(JavaRDD.scala:78)
at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:390)
at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1626)
at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1099)
at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1099)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1767)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1767)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63)
at org.apache.spark.scheduler.Task.run(Task.scala:70)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
{quote}
> Filter fails when filtering with a method reference to overloaded method
> ------------------------------------------------------------------------
>
> Key: SPARK-9135
> URL: https://issues.apache.org/jira/browse/SPARK-9135
> Project: Spark
> Issue Type: Bug
> Components: Java API
> Affects Versions: 1.4.0
> Reporter: Mateusz Michalowski
>
> Filter fails when filtering with a method reference to overloaded method.
> In the example below we filter by Fruit::isRed, which is overloaded by Apple::isRed and Banana::isRed.
> If we call {code} apples.filter(Fruit::isRed) {code}
> and then {code} bananas.filter(Fruit::isRed) {code}
> or {code} fruit.filter(Fruit::isRed) {code}
> Spark will try to cast Apple::isRed to Banana::isRed - and then throw as a result.
> No exception will happen if we use lambda instead of method reference:
> {code}
> .filter(f -> f.isRed())
> {code}
> I attach a test setup below:
> {code:java}
> package com.doggybites;
> import org.apache.spark.SparkConf;
> import org.apache.spark.api.java.JavaRDD;
> import org.apache.spark.api.java.JavaSparkContext;
> import org.junit.After;
> import org.junit.Before;
> import org.junit.Test;
> import java.io.Serializable;
> import java.util.Arrays;
> import static org.hamcrest.CoreMatchers.equalTo;
> import static org.junit.Assert.assertThat;
> public class SparkTest {
> static abstract class Fruit implements Serializable {
> abstract boolean isRed();
> }
> static class Banana extends Fruit {
> @Override
> boolean isRed() {
> return false;
> }
> }
> static class Apple extends Fruit {
> @Override
> boolean isRed() {
> return true;
> }
> }
> private JavaSparkContext sparkContext;
> @Before
> public void setUp() throws Exception {
> SparkConf sparkConf = new SparkConf().setAppName("test").setMaster("local[2]");
> sparkContext = new JavaSparkContext(sparkConf);
> }
> @After
> public void tearDown() throws Exception {
> sparkContext.stop();
> }
> private <T> JavaRDD<T> toRdd(T ... array) {
> return sparkContext.parallelize(Arrays.asList(array));
> }
> @Test
> public void filters_apples_and_bananas_with_method_reference() {
> JavaRDD<Apple> appleRdd = toRdd(new Apple());
> JavaRDD<Banana> bananaRdd = toRdd(new Banana());
> long redAppleCount = appleRdd.filter(Fruit::isRed).count();
> long redBananaCount = bananaRdd.filter(Fruit::isRed).count();
> assertThat(redAppleCount, equalTo(1L));
> assertThat(redBananaCount, equalTo(0L));
> }
> }
> {code}
> The test above throws:
> {code}
> 15/07/17 14:10:04 ERROR Executor: Exception in task 1.0 in stage 1.0 (TID 3)
> java.lang.ClassCastException: com.doggybites.SparkTest$Banana cannot be cast to com.doggybites.SparkTest$Apple
> at com.doggybites.SparkTest$$Lambda$2/976119300.call(Unknown Source)
> at org.apache.spark.api.java.JavaRDD$$anonfun$filter$1.apply(JavaRDD.scala:78)
> at org.apache.spark.api.java.JavaRDD$$anonfun$filter$1.apply(JavaRDD.scala:78)
> at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:390)
> at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1626)
> at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1099)
> at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1099)
> at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1767)
> at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1767)
> at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63)
> at org.apache.spark.scheduler.Task.run(Task.scala:70)
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
> at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> at java.lang.Thread.run(Thread.java:745)
> 15/07/17 14:10:04 WARN TaskSetManager: Lost task 1.0 in stage 1.0 (TID 3, localhost): java.lang.ClassCastException: com.doggybites.SparkTest$Banana cannot be cast to com.doggybites.SparkTest$Apple
> at com.doggybites.SparkTest$$Lambda$2/976119300.call(Unknown Source)
> at org.apache.spark.api.java.JavaRDD$$anonfun$filter$1.apply(JavaRDD.scala:78)
> at org.apache.spark.api.java.JavaRDD$$anonfun$filter$1.apply(JavaRDD.scala:78)
> at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:390)
> at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1626)
> at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1099)
> at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1099)
> at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1767)
> at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1767)
> at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63)
> at org.apache.spark.scheduler.Task.run(Task.scala:70)
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
> at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> at java.lang.Thread.run(Thread.java:745)
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org