You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2020/02/10 11:37:42 UTC

[GitHub] [spark] zhengruifeng commented on issue #27523: avoid tuple assignment because it will circumvent the transient tag

zhengruifeng commented on issue #27523: avoid tuple assignment because it will circumvent the transient tag
URL: https://github.com/apache/spark/pull/27523#issuecomment-584081959
 
 
   I use following code to check this issue:
   env: Scala 2.13.1 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_241)
   
   ```scala
   import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}
     
     def serialise(value: Any): Array[Byte] = {
       val stream: ByteArrayOutputStream = new ByteArrayOutputStream()
       val oos = new ObjectOutputStream(stream)
       oos.writeObject(value)
       oos.close()
       stream.toByteArray
     }
   
     def deserialise(bytes: Array[Byte]): Any = {
       val ois = new ObjectInputStream(new ByteArrayInputStream(bytes))
       val value = ois.readObject
       ois.close()
       value
     }
   
   
   
   class A extends Serializable { @transient lazy val a = {println("get a"); System.currentTimeMillis} }
   val a = new A
   a.a
   val a2 = deserialise(serialise(a)).asInstanceOf[A]
   a2.a
   a.a == a2.a
   
   class B extends Serializable { @transient lazy val (a,b) = {println("get a & b"); val t = System.currentTimeMillis; (t, -t)} }
   val b = new B
   b.a
   val b2 = deserialise(serialise(b)).asInstanceOf[B]
   b2.a
   b.a == b2.a
   b.b == b2.b
   
   class C extends Serializable { @transient lazy val t = {println("get a & b"); val t = System.currentTimeMillis; (t, -t)}; @transient lazy val a = t._1; @transient lazy val b = t._2 }
   val c = new C
   c.a
   val c2 = deserialise(serialise(c)).asInstanceOf[C]
   c2.a
   c.a == c2.a
   c.b == c2.b
   ```
   
   Result:
   ```scala
   scala> class A extends Serializable { @transient lazy val a = {println("get a"); System.currentTimeMillis} }
   defined class A
   
   scala> val a = new A
   a: A = A@68ef01a5
   
   scala> a.a
   get a
   res0: Long = 1581333143300
   
   scala> val a2 = deserialise(serialise(a)).asInstanceOf[A]
   a2: A = A@f017dd0
   
   scala> a2.a
   get a
   res1: Long = 1581333143523
   
   scala> a.a == a2.a
   res2: Boolean = false
   
   scala> 
   
   scala> class B extends Serializable { @transient lazy val (a,b) = {println("get a & b"); val t = System.currentTimeMillis; (t, -t)} }
   defined class B
   
   scala> val b = new B
   b: B = B@1d008e61
   
   scala> b.a
   get a & b
   res3: Long = 1581333144022
   
   scala> val b2 = deserialise(serialise(b)).asInstanceOf[B]
   b2: B = B@6ab826bb
   
   scala> b2.a
   res4: Long = 1581333144022
   
   scala> b.a == b2.a
   res5: Boolean = true
   
   scala> b.b == b2.b
   res6: Boolean = true
   
   scala> 
   
   scala> class C extends Serializable { @transient lazy val t = {println("get a & b"); val t = System.currentTimeMillis; (t, -t)}; @transient lazy val a = t._1; @transient lazy val b = t._2 }
   defined class C
   
   scala> val c = new C
   c: C = C@7ec01440
   
   scala> c.a
   get a & b
   res7: Long = 1581333144575
   
   scala> val c2 = deserialise(serialise(c)).asInstanceOf[C]
   c2: C = C@42a698bd
   
   scala> c2.a
   get a & b
   res8: Long = 1581333144713
   
   scala> c.a == c2.a
   res9: Boolean = false
   
   scala> c.b == c2.b
   res10: Boolean = false
   ```
   
   We can see that `b2.a` does not trigger the `println`, and the fields `.a` and `.b` contains the same values after  serialization;
   while `a2.a` and `c2.a` will trigger the `println`, and the fields change after serialization;

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org