You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sr...@apache.org on 2016/02/24 14:35:42 UTC
spark git commit: [SPARK-13390][SQL][BRANCH-1.6] Fix the issue that Iterator.map().toSeq is not Serializable

Repository: spark
Updated Branches:
  refs/heads/branch-1.6 573a2c97e -> 06f4fce29


[SPARK-13390][SQL][BRANCH-1.6] Fix the issue that Iterator.map().toSeq is not Serializable

## What changes were proposed in this pull request?

`scala.collection.Iterator`'s methods (e.g., map, filter) will return an `AbstractIterator` which is not Serializable. E.g.,
```Scala
scala> val iter = Array(1, 2, 3).iterator.map(_ + 1)
iter: Iterator[Int] = non-empty iterator

scala> println(iter.isInstanceOf[Serializable])
false
```
If we call something like `Iterator.map(...).toSeq`, it will create a `Stream` that contains a non-serializable `AbstractIterator` field and make the `Stream` be non-serializable.

This PR uses `toArray` instead of `toSeq` to fix such issue in `def createDataFrame(data: java.util.List[_], beanClass: Class[_]): DataFrame`.

## How was the this patch tested?

Jenkins tests.

Author: Shixiong Zhu <sh...@databricks.com>

Closes #11334 from zsxwing/SPARK-13390.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/06f4fce2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/06f4fce2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/06f4fce2

Branch: refs/heads/branch-1.6
Commit: 06f4fce29227f9763d9f9abff6e7459542dce261
Parents: 573a2c9
Author: Shixiong Zhu <sh...@databricks.com>
Authored: Wed Feb 24 13:35:36 2016 +0000
Committer: Sean Owen <so...@cloudera.com>
Committed: Wed Feb 24 13:35:36 2016 +0000

----------------------------------------------------------------------
 .../scala/org/apache/spark/sql/SQLContext.scala    |  2 +-
 .../org/apache/spark/sql/SQLContextSuite.scala     | 17 +++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/06f4fce2/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 4e26250..47fd7fc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -604,7 +604,7 @@ class SQLContext private[sql](
     val className = beanClass.getName
     val beanInfo = Introspector.getBeanInfo(beanClass)
     val rows = SQLContext.beansToRows(data.asScala.iterator, beanInfo, attrSeq)
-    DataFrame(self, LocalRelation(attrSeq, rows.toSeq))
+    DataFrame(self, LocalRelation(attrSeq, rows.toArray))
   }
 
 

http://git-wip-us.apache.org/repos/asf/spark/blob/06f4fce2/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala
index 1994dac..9bf865d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala
@@ -65,4 +65,21 @@ class SQLContextSuite extends SparkFunSuite with SharedSparkContext{
       session2.sql("select myadd(1, 2)").explain()
     }
   }
+
+  test("SPARK-13390: createDataFrame(java.util.List[_],Class[_]) NotSerializableException") {
+    val rows = new java.util.ArrayList[IntJavaBean]()
+    rows.add(new IntJavaBean(1))
+    val sqlContext = SQLContext.getOrCreate(sc)
+    // Without the fix for SPARK-13390, this will throw NotSerializableException
+    sqlContext.createDataFrame(rows, classOf[IntJavaBean]).groupBy("int").count().collect()
+  }
+}
+
+class IntJavaBean(private var i: Int) extends Serializable {
+
+  def getInt(): Int = i
+
+  def setInt(i: Int): Unit = {
+    this.i = i
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org