You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2016/01/04 21:42:01 UTC

spark git commit: [SPARK-12421][SQL] Prevent Internal/External row from exposing state.

Repository: spark
Updated Branches:
  refs/heads/master 40d03960d -> 0171b71e9


[SPARK-12421][SQL] Prevent Internal/External row from exposing state.

It is currently possible to change the values of the supposedly immutable ```GenericRow``` and ```GenericInternalRow``` classes. This is caused by the fact that scala's ArrayOps ```toArray``` (returned by calling ```toSeq```) will return the backing array instead of a copy. This PR fixes this problem.

This PR was inspired by https://github.com/apache/spark/pull/10374 by apo1.

cc apo1 sarutak marmbrus cloud-fan nongli (everyone in the previous conversation).

Author: Herman van Hovell <hv...@questtec.nl>

Closes #10553 from hvanhovell/SPARK-12421.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0171b71e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0171b71e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0171b71e

Branch: refs/heads/master
Commit: 0171b71e9511cef512e96a759e407207037f3c49
Parents: 40d0396
Author: Herman van Hovell <hv...@questtec.nl>
Authored: Mon Jan 4 12:41:57 2016 -0800
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Mon Jan 4 12:41:57 2016 -0800

----------------------------------------------------------------------
 .../spark/sql/catalyst/expressions/rows.scala   |  8 +++---
 .../scala/org/apache/spark/sql/RowTest.scala    | 30 ++++++++++++++++++++
 2 files changed, 34 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/0171b71e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
index cfc68fc..814b3c2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
@@ -199,9 +199,9 @@ class GenericRow(protected[sql] val values: Array[Any]) extends Row {
 
   override def get(i: Int): Any = values(i)
 
-  override def toSeq: Seq[Any] = values.toSeq
+  override def toSeq: Seq[Any] = values.clone()
 
-  override def copy(): Row = this
+  override def copy(): GenericRow = this
 }
 
 class GenericRowWithSchema(values: Array[Any], override val schema: StructType)
@@ -226,11 +226,11 @@ class GenericInternalRow(private[sql] val values: Array[Any]) extends BaseGeneri
 
   override protected def genericGet(ordinal: Int) = values(ordinal)
 
-  override def toSeq(fieldTypes: Seq[DataType]): Seq[Any] = values
+  override def toSeq(fieldTypes: Seq[DataType]): Seq[Any] = values.clone()
 
   override def numFields: Int = values.length
 
-  override def copy(): InternalRow = new GenericInternalRow(values.clone())
+  override def copy(): GenericInternalRow = this
 }
 
 /**

http://git-wip-us.apache.org/repos/asf/spark/blob/0171b71e/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala
index 5c22a72..72624e7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala
@@ -104,4 +104,34 @@ class RowTest extends FunSpec with Matchers {
       internalRow shouldEqual internalRow2
     }
   }
+
+  describe("row immutability") {
+    val values = Seq(1, 2, "3", "IV", 6L)
+    val externalRow = Row.fromSeq(values)
+    val internalRow = InternalRow.fromSeq(values)
+
+    def modifyValues(values: Seq[Any]): Seq[Any] = {
+      val array = values.toArray
+      array(2) = "42"
+      array
+    }
+
+    it("copy should return same ref for external rows") {
+      externalRow should be theSameInstanceAs externalRow.copy()
+    }
+
+    it("copy should return same ref for interal rows") {
+      internalRow should be theSameInstanceAs internalRow.copy()
+    }
+
+    it("toSeq should not expose internal state for external rows") {
+      val modifiedValues = modifyValues(externalRow.toSeq)
+      externalRow.toSeq should not equal modifiedValues
+    }
+
+    it("toSeq should not expose internal state for internal rows") {
+      val modifiedValues = modifyValues(internalRow.toSeq(Seq.empty))
+      internalRow.toSeq(Seq.empty) should not equal modifiedValues
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org