You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Michael Malak (JIRA)" <ji...@apache.org> on 2014/05/15 00:32:23 UTC
[jira] [Updated] (SPARK-1836) REPL $outer type mismatch causes
lookup() and equals() problems
[ https://issues.apache.org/jira/browse/SPARK-1836?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Michael Malak updated SPARK-1836:
---------------------------------
Description:
Anand Avati partially traced the cause to REPL wrapping classes in $outer classes. There are at least two major symptoms:
1. equals()
=========
In REPL equals() (required in custom classes used as a key for groupByKey) seems to have to be written using instanceOf[] instead of the canonical match{}
Spark Shell (equals uses match{}):
{noformat}
class C(val s:String) extends Serializable {
override def equals(o: Any) = o match {
case that: C => that.s == s
case _ => false
}
}
val x = new C("a")
val bos = new java.io.ByteArrayOutputStream()
val out = new java.io.ObjectOutputStream(bos)
out.writeObject(x);
val b = bos.toByteArray();
out.close
bos.close
val y = new java.io.ObjectInputStream(new ava.io.ByteArrayInputStream(b)).readObject().asInstanceOf[C]
x.equals(y)
res: Boolean = false
{noformat}
Spark Shell (equals uses isInstanceOf[]):
{noformat}
class C(val s:String) extends Serializable {
override def equals(o: Any) = if (o.isInstanceOf[C]) (o.asInstanceOf[C].s = s) else false
}
val x = new C("a")
val bos = new java.io.ByteArrayOutputStream()
val out = new java.io.ObjectOutputStream(bos)
out.writeObject(x);
val b = bos.toByteArray();
out.close
bos.close
val y = new java.io.ObjectInputStream(new ava.io.ByteArrayInputStream(b)).readObject().asInstanceOf[C]
x.equals(y)
res: Boolean = true
{noformat}
Scala Shell (equals uses match{}):
{noformat}
class C(val s:String) extends Serializable {
override def equals(o: Any) = o match {
case that: C => that.s == s
case _ => false
}
}
val x = new C("a")
val bos = new java.io.ByteArrayOutputStream()
val out = new java.io.ObjectOutputStream(bos)
out.writeObject(x);
val b = bos.toByteArray();
out.close
bos.close
val y = new java.io.ObjectInputStream(new java.io.ByteArrayInputStream(b)).readObject().asInstanceOf[C]
x.equals(y)
res: Boolean = true
{noformat}
2. lookup()
=========
{noformat}
class C(val s:String) extends Serializable {
override def equals(o: Any) = if (o.isInstanceOf[C]) o.asInstanceOf[C].s == s else false
override def hashCode = s.hashCode
override def toString = s
}
val r = sc.parallelize(Array((new C("a"),11),(new C("a"),12)))
r.lookup(new C("a"))
<console>:17: error: type mismatch;
found : C
required: C
r.lookup(new C("a"))
^
{noformat}
See
http://mail-archives.apache.org/mod_mbox/spark-dev/201405.mbox/%3C1400019424.80629.YahooMailNeo%40web160801.mail.bf1.yahoo.com%3E
was:
Anand Avati partially traced the cause to REPL wrapping classes in $outer classes. There are at least two major symptoms:
1. equals()
=========
In REPL equals() (required in custom classes used as a key for groupByKey) seems to have to be written using instanceOf[] instead of the canonical match{}
Spark Shell (equals uses match{}):
class C(val s:String) extends Serializable {
override def equals(o: Any) = o match {
case that: C => that.s == s
case _ => false
}
}
val x = new C("a")
val bos = new java.io.ByteArrayOutputStream()
val out = new java.io.ObjectOutputStream(bos)
out.writeObject(x);
val b = bos.toByteArray();
out.close
bos.close
val y = new java.io.ObjectInputStream(new ava.io.ByteArrayInputStream(b)).readObject().asInstanceOf[C]
x.equals(y)
res: Boolean = false
Spark Shell (equals uses isInstanceOf[]):
class C(val s:String) extends Serializable {
override def equals(o: Any) = if (o.isInstanceOf[C]) (o.asInstanceOf[C].s = s) else false
}
val x = new C("a")
val bos = new java.io.ByteArrayOutputStream()
val out = new java.io.ObjectOutputStream(bos)
out.writeObject(x);
val b = bos.toByteArray();
out.close
bos.close
val y = new java.io.ObjectInputStream(new ava.io.ByteArrayInputStream(b)).readObject().asInstanceOf[C]
x.equals(y)
res: Boolean = true
Scala Shell (equals uses match{}):
class C(val s:String) extends Serializable {
override def equals(o: Any) = o match {
case that: C => that.s == s
case _ => false
}
}
val x = new C("a")
val bos = new java.io.ByteArrayOutputStream()
val out = new java.io.ObjectOutputStream(bos)
out.writeObject(x);
val b = bos.toByteArray();
out.close
bos.close
val y = new java.io.ObjectInputStream(new java.io.ByteArrayInputStream(b)).readObject().asInstanceOf[C]
x.equals(y)
res: Boolean = true
2. lookup()
=========
class C(val s:String) extends Serializable {
override def equals(o: Any) = if (o.isInstanceOf[C]) o.asInstanceOf[C].s == s else false
override def hashCode = s.hashCode
override def toString = s
}
val r = sc.parallelize(Array((new C("a"),11),(new C("a"),12)))
r.lookup(new C("a"))
<console>:17: error: type mismatch;
found : C
required: C
r.lookup(new C("a"))
^
See
http://mail-archives.apache.org/mod_mbox/spark-dev/201405.mbox/%3C1400019424.80629.YahooMailNeo%40web160801.mail.bf1.yahoo.com%3E
> REPL $outer type mismatch causes lookup() and equals() problems
> ---------------------------------------------------------------
>
> Key: SPARK-1836
> URL: https://issues.apache.org/jira/browse/SPARK-1836
> Project: Spark
> Issue Type: Bug
> Affects Versions: 0.9.0
> Reporter: Michael Malak
>
> Anand Avati partially traced the cause to REPL wrapping classes in $outer classes. There are at least two major symptoms:
> 1. equals()
> =========
> In REPL equals() (required in custom classes used as a key for groupByKey) seems to have to be written using instanceOf[] instead of the canonical match{}
> Spark Shell (equals uses match{}):
> {noformat}
> class C(val s:String) extends Serializable {
> override def equals(o: Any) = o match {
> case that: C => that.s == s
> case _ => false
> }
> }
> val x = new C("a")
> val bos = new java.io.ByteArrayOutputStream()
> val out = new java.io.ObjectOutputStream(bos)
> out.writeObject(x);
> val b = bos.toByteArray();
> out.close
> bos.close
> val y = new java.io.ObjectInputStream(new ava.io.ByteArrayInputStream(b)).readObject().asInstanceOf[C]
> x.equals(y)
> res: Boolean = false
> {noformat}
> Spark Shell (equals uses isInstanceOf[]):
> {noformat}
> class C(val s:String) extends Serializable {
> override def equals(o: Any) = if (o.isInstanceOf[C]) (o.asInstanceOf[C].s = s) else false
> }
> val x = new C("a")
> val bos = new java.io.ByteArrayOutputStream()
> val out = new java.io.ObjectOutputStream(bos)
> out.writeObject(x);
> val b = bos.toByteArray();
> out.close
> bos.close
> val y = new java.io.ObjectInputStream(new ava.io.ByteArrayInputStream(b)).readObject().asInstanceOf[C]
> x.equals(y)
> res: Boolean = true
> {noformat}
> Scala Shell (equals uses match{}):
> {noformat}
> class C(val s:String) extends Serializable {
> override def equals(o: Any) = o match {
> case that: C => that.s == s
> case _ => false
> }
> }
> val x = new C("a")
> val bos = new java.io.ByteArrayOutputStream()
> val out = new java.io.ObjectOutputStream(bos)
> out.writeObject(x);
> val b = bos.toByteArray();
> out.close
> bos.close
> val y = new java.io.ObjectInputStream(new java.io.ByteArrayInputStream(b)).readObject().asInstanceOf[C]
> x.equals(y)
> res: Boolean = true
> {noformat}
> 2. lookup()
> =========
> {noformat}
> class C(val s:String) extends Serializable {
> override def equals(o: Any) = if (o.isInstanceOf[C]) o.asInstanceOf[C].s == s else false
> override def hashCode = s.hashCode
> override def toString = s
> }
> val r = sc.parallelize(Array((new C("a"),11),(new C("a"),12)))
> r.lookup(new C("a"))
> <console>:17: error: type mismatch;
> found : C
> required: C
> r.lookup(new C("a"))
> ^
> {noformat}
> See
> http://mail-archives.apache.org/mod_mbox/spark-dev/201405.mbox/%3C1400019424.80629.YahooMailNeo%40web160801.mail.bf1.yahoo.com%3E
--
This message was sent by Atlassian JIRA
(v6.2#6252)