You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2013/11/20 01:04:04 UTC

[1/2] git commit: Enable the Broadcast examples to work in a cluster setting

Updated Branches:
  refs/heads/master e2ebc3a9d -> 55925805f


Enable the Broadcast examples to work in a cluster setting

Since they rely on println to display results, we need to first collect
those results to the driver to have them actually display locally.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/50fd8d98
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/50fd8d98
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/50fd8d98

Branch: refs/heads/master
Commit: 50fd8d98c00f7db6aa34183705c9269098c62486
Parents: e2ebc3a
Author: Aaron Davidson <aa...@databricks.com>
Authored: Mon Nov 18 22:51:35 2013 -0800
Committer: Aaron Davidson <aa...@databricks.com>
Committed: Mon Nov 18 22:51:35 2013 -0800

----------------------------------------------------------------------
 .../org/apache/spark/examples/BroadcastTest.scala    | 10 +++++-----
 .../apache/spark/examples/MultiBroadcastTest.scala   | 15 +++++++++------
 2 files changed, 14 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/50fd8d98/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala b/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala
index 529709c..a119980 100644
--- a/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala
@@ -32,13 +32,13 @@ object BroadcastTest {
     System.setProperty("spark.broadcast.factory", "org.apache.spark.broadcast." + bcName + "BroadcastFactory")
     System.setProperty("spark.broadcast.blockSize", blockSize)
 
-    val sc = new SparkContext(args(0), "Broadcast Test 2",
+    val sc = new SparkContext(args(0), "Broadcast Test",
       System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
     
     val slices = if (args.length > 1) args(1).toInt else 2
     val num = if (args.length > 2) args(2).toInt else 1000000
 
-    var arr1 = new Array[Int](num)
+    val arr1 = new Array[Int](num)
     for (i <- 0 until arr1.length) {
       arr1(i) = i
     }
@@ -48,9 +48,9 @@ object BroadcastTest {
       println("===========")
       val startTime = System.nanoTime
       val barr1 = sc.broadcast(arr1)
-      sc.parallelize(1 to 10, slices).foreach {
-        i => println(barr1.value.size)
-      }
+      val observedSizes = sc.parallelize(1 to 10, slices).map(_ => barr1.value.size)
+      // Collect the small RDD so we can print the observed sizes locally.
+      observedSizes.collect().foreach(i => println(i))
       println("Iteration %d took %.0f milliseconds".format(i, (System.nanoTime - startTime) / 1E6))
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/50fd8d98/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala
----------------------------------------------------------------------
diff --git a/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala b/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala
index f79f014..e1afc29 100644
--- a/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala
@@ -18,35 +18,38 @@
 package org.apache.spark.examples
 
 import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
 
 object MultiBroadcastTest {
   def main(args: Array[String]) {
     if (args.length == 0) {
-      System.err.println("Usage: BroadcastTest <master> [<slices>] [numElem]")
+      System.err.println("Usage: MultiBroadcastTest <master> [<slices>] [numElem]")
       System.exit(1)
     }
 
-    val sc = new SparkContext(args(0), "Broadcast Test",
+    val sc = new SparkContext(args(0), "Multi-Broadcast Test",
       System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
 
     val slices = if (args.length > 1) args(1).toInt else 2
     val num = if (args.length > 2) args(2).toInt else 1000000
 
-    var arr1 = new Array[Int](num)
+    val arr1 = new Array[Int](num)
     for (i <- 0 until arr1.length) {
       arr1(i) = i
     }
 
-    var arr2 = new Array[Int](num)
+    val arr2 = new Array[Int](num)
     for (i <- 0 until arr2.length) {
       arr2(i) = i
     }
 
     val barr1 = sc.broadcast(arr1)
     val barr2 = sc.broadcast(arr2)
-    sc.parallelize(1 to 10, slices).foreach {
-      i => println(barr1.value.size + barr2.value.size)
+    val observedSizes: RDD[(Int, Int)] = sc.parallelize(1 to 10, slices).map { _ =>
+      (barr1.value.size, barr2.value.size)
     }
+    // Collect the small RDD so we can print the observed sizes locally.
+    observedSizes.collect().foreach(i => println(i))
 
     System.exit(0)
   }


[2/2] git commit: Merge pull request #187 from aarondav/example-bcast-test

Posted by ma...@apache.org.
Merge pull request #187 from aarondav/example-bcast-test

Enable the Broadcast examples to work in a cluster setting

Since they rely on println to display results, we need to first collect those results to the driver to have them actually display locally.

This issue came up on the mailing lists [here](http://mail-archives.apache.org/mod_mbox/incubator-spark-user/201311.mbox/%3C2013111909591557147628%40ict.ac.cn%3E).


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/55925805
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/55925805
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/55925805

Branch: refs/heads/master
Commit: 55925805fcef6ca782e1f42848a20133865b9412
Parents: e2ebc3a 50fd8d9
Author: Matei Zaharia <ma...@eecs.berkeley.edu>
Authored: Tue Nov 19 16:04:01 2013 -0800
Committer: Matei Zaharia <ma...@eecs.berkeley.edu>
Committed: Tue Nov 19 16:04:01 2013 -0800

----------------------------------------------------------------------
 .../org/apache/spark/examples/BroadcastTest.scala    | 10 +++++-----
 .../apache/spark/examples/MultiBroadcastTest.scala   | 15 +++++++++------
 2 files changed, 14 insertions(+), 11 deletions(-)
----------------------------------------------------------------------