You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2016/01/04 21:42:01 UTC
spark git commit: [SPARK-12421][SQL] Prevent Internal/External row
from exposing state.
Repository: spark
Updated Branches:
refs/heads/master 40d03960d -> 0171b71e9
[SPARK-12421][SQL] Prevent Internal/External row from exposing state.
It is currently possible to change the values of the supposedly immutable ```GenericRow``` and ```GenericInternalRow``` classes. This is caused by the fact that scala's ArrayOps ```toArray``` (returned by calling ```toSeq```) will return the backing array instead of a copy. This PR fixes this problem.
This PR was inspired by https://github.com/apache/spark/pull/10374 by apo1.
cc apo1 sarutak marmbrus cloud-fan nongli (everyone in the previous conversation).
Author: Herman van Hovell <hv...@questtec.nl>
Closes #10553 from hvanhovell/SPARK-12421.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0171b71e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0171b71e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0171b71e
Branch: refs/heads/master
Commit: 0171b71e9511cef512e96a759e407207037f3c49
Parents: 40d0396
Author: Herman van Hovell <hv...@questtec.nl>
Authored: Mon Jan 4 12:41:57 2016 -0800
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Mon Jan 4 12:41:57 2016 -0800
----------------------------------------------------------------------
.../spark/sql/catalyst/expressions/rows.scala | 8 +++---
.../scala/org/apache/spark/sql/RowTest.scala | 30 ++++++++++++++++++++
2 files changed, 34 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/0171b71e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
index cfc68fc..814b3c2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
@@ -199,9 +199,9 @@ class GenericRow(protected[sql] val values: Array[Any]) extends Row {
override def get(i: Int): Any = values(i)
- override def toSeq: Seq[Any] = values.toSeq
+ override def toSeq: Seq[Any] = values.clone()
- override def copy(): Row = this
+ override def copy(): GenericRow = this
}
class GenericRowWithSchema(values: Array[Any], override val schema: StructType)
@@ -226,11 +226,11 @@ class GenericInternalRow(private[sql] val values: Array[Any]) extends BaseGeneri
override protected def genericGet(ordinal: Int) = values(ordinal)
- override def toSeq(fieldTypes: Seq[DataType]): Seq[Any] = values
+ override def toSeq(fieldTypes: Seq[DataType]): Seq[Any] = values.clone()
override def numFields: Int = values.length
- override def copy(): InternalRow = new GenericInternalRow(values.clone())
+ override def copy(): GenericInternalRow = this
}
/**
http://git-wip-us.apache.org/repos/asf/spark/blob/0171b71e/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala
index 5c22a72..72624e7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala
@@ -104,4 +104,34 @@ class RowTest extends FunSpec with Matchers {
internalRow shouldEqual internalRow2
}
}
+
+ describe("row immutability") {
+ val values = Seq(1, 2, "3", "IV", 6L)
+ val externalRow = Row.fromSeq(values)
+ val internalRow = InternalRow.fromSeq(values)
+
+ def modifyValues(values: Seq[Any]): Seq[Any] = {
+ val array = values.toArray
+ array(2) = "42"
+ array
+ }
+
+ it("copy should return same ref for external rows") {
+ externalRow should be theSameInstanceAs externalRow.copy()
+ }
+
+ it("copy should return same ref for interal rows") {
+ internalRow should be theSameInstanceAs internalRow.copy()
+ }
+
+ it("toSeq should not expose internal state for external rows") {
+ val modifiedValues = modifyValues(externalRow.toSeq)
+ externalRow.toSeq should not equal modifiedValues
+ }
+
+ it("toSeq should not expose internal state for internal rows") {
+ val modifiedValues = modifyValues(internalRow.toSeq(Seq.empty))
+ internalRow.toSeq(Seq.empty) should not equal modifiedValues
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org