You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2020/02/10 11:37:42 UTC
[GitHub] [spark] zhengruifeng commented on issue #27523: avoid tuple
assignment because it will circumvent the transient tag
zhengruifeng commented on issue #27523: avoid tuple assignment because it will circumvent the transient tag
URL: https://github.com/apache/spark/pull/27523#issuecomment-584081959
I use following code to check this issue:
env: Scala 2.13.1 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_241)
```scala
import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}
def serialise(value: Any): Array[Byte] = {
val stream: ByteArrayOutputStream = new ByteArrayOutputStream()
val oos = new ObjectOutputStream(stream)
oos.writeObject(value)
oos.close()
stream.toByteArray
}
def deserialise(bytes: Array[Byte]): Any = {
val ois = new ObjectInputStream(new ByteArrayInputStream(bytes))
val value = ois.readObject
ois.close()
value
}
class A extends Serializable { @transient lazy val a = {println("get a"); System.currentTimeMillis} }
val a = new A
a.a
val a2 = deserialise(serialise(a)).asInstanceOf[A]
a2.a
a.a == a2.a
class B extends Serializable { @transient lazy val (a,b) = {println("get a & b"); val t = System.currentTimeMillis; (t, -t)} }
val b = new B
b.a
val b2 = deserialise(serialise(b)).asInstanceOf[B]
b2.a
b.a == b2.a
b.b == b2.b
class C extends Serializable { @transient lazy val t = {println("get a & b"); val t = System.currentTimeMillis; (t, -t)}; @transient lazy val a = t._1; @transient lazy val b = t._2 }
val c = new C
c.a
val c2 = deserialise(serialise(c)).asInstanceOf[C]
c2.a
c.a == c2.a
c.b == c2.b
```
Result:
```scala
scala> class A extends Serializable { @transient lazy val a = {println("get a"); System.currentTimeMillis} }
defined class A
scala> val a = new A
a: A = A@68ef01a5
scala> a.a
get a
res0: Long = 1581333143300
scala> val a2 = deserialise(serialise(a)).asInstanceOf[A]
a2: A = A@f017dd0
scala> a2.a
get a
res1: Long = 1581333143523
scala> a.a == a2.a
res2: Boolean = false
scala>
scala> class B extends Serializable { @transient lazy val (a,b) = {println("get a & b"); val t = System.currentTimeMillis; (t, -t)} }
defined class B
scala> val b = new B
b: B = B@1d008e61
scala> b.a
get a & b
res3: Long = 1581333144022
scala> val b2 = deserialise(serialise(b)).asInstanceOf[B]
b2: B = B@6ab826bb
scala> b2.a
res4: Long = 1581333144022
scala> b.a == b2.a
res5: Boolean = true
scala> b.b == b2.b
res6: Boolean = true
scala>
scala> class C extends Serializable { @transient lazy val t = {println("get a & b"); val t = System.currentTimeMillis; (t, -t)}; @transient lazy val a = t._1; @transient lazy val b = t._2 }
defined class C
scala> val c = new C
c: C = C@7ec01440
scala> c.a
get a & b
res7: Long = 1581333144575
scala> val c2 = deserialise(serialise(c)).asInstanceOf[C]
c2: C = C@42a698bd
scala> c2.a
get a & b
res8: Long = 1581333144713
scala> c.a == c2.a
res9: Boolean = false
scala> c.b == c2.b
res10: Boolean = false
```
We can see that `b2.a` does not trigger the `println`, and the fields `.a` and `.b` contains the same values after serialization;
while `a2.a` and `c2.a` will trigger the `println`, and the fields change after serialization;
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org