You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by pw...@apache.org on 2013/12/12 08:11:32 UTC

[01/50] git commit: Fix small bug in web UI and minor clean-up.

Updated Branches:
  refs/heads/scala-2.10 5429d62df -> 2e89398e4


Fix small bug in web UI and minor clean-up.

There was a bug where sorting order didn't work correctly for write time metrics.

I also cleaned up some earlier code that fixed the same issue for read and
write bytes.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/380b90b9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/380b90b9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/380b90b9

Branch: refs/heads/scala-2.10
Commit: 380b90b9b360db9cb6a4edc1312704afe11eb31d
Parents: d6e5473
Author: Patrick Wendell <pw...@gmail.com>
Authored: Wed Dec 4 14:41:48 2013 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Wed Dec 4 14:41:48 2013 -0800

----------------------------------------------------------------------
 .../org/apache/spark/ui/jobs/StagePage.scala    | 29 +++++++++-----------
 1 file changed, 13 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/380b90b9/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
index fbd8228..baccc42 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
@@ -152,21 +152,18 @@ private[spark] class StagePage(parent: JobProgressUI) {
       else metrics.map(m => parent.formatDuration(m.executorRunTime)).getOrElse("")
     val gcTime = metrics.map(m => m.jvmGCTime).getOrElse(0L)
 
-    var shuffleReadSortable: String = ""
-    var shuffleReadReadable: String = ""
-    if (shuffleRead) {
-      shuffleReadSortable = metrics.flatMap{m => m.shuffleReadMetrics}.map{s => s.remoteBytesRead}.toString()
-      shuffleReadReadable = metrics.flatMap{m => m.shuffleReadMetrics}.map{s =>
-        Utils.bytesToString(s.remoteBytesRead)}.getOrElse("")
-    }
+    val maybeShuffleRead = metrics.flatMap{m => m.shuffleReadMetrics}.map{s => s.remoteBytesRead}
+    val shuffleReadSortable = maybeShuffleRead.map(_.toString).getOrElse("")
+    val shuffleReadReadable = maybeShuffleRead.map{Utils.bytesToString(_)}.getOrElse("")
 
-    var shuffleWriteSortable: String = ""
-    var shuffleWriteReadable: String = ""
-    if (shuffleWrite) {
-      shuffleWriteSortable = metrics.flatMap{m => m.shuffleWriteMetrics}.map{s => s.shuffleBytesWritten}.toString()
-      shuffleWriteReadable = metrics.flatMap{m => m.shuffleWriteMetrics}.map{s =>
-        Utils.bytesToString(s.shuffleBytesWritten)}.getOrElse("")
-    }
+    val maybeShuffleWrite = metrics.flatMap{m => m.shuffleWriteMetrics}.map{s => s.shuffleBytesWritten}
+    val shuffleWriteSortable = maybeShuffleWrite.map(_.toString).getOrElse("")
+    val shuffleWriteReadable = maybeShuffleWrite.map{Utils.bytesToString(_)}.getOrElse("")
+
+    val maybeWriteTime = metrics.flatMap{m => m.shuffleWriteMetrics}.map{s => s.shuffleWriteTime}
+    val writeTimeSortable = maybeWriteTime.map(_.toString).getOrElse("")
+    val writeTimeReadable = maybeWriteTime.map{ t => t / (1000 * 1000)}.map{ ms =>
+      if (ms == 0) "" else parent.formatDuration(ms)}.getOrElse("")
 
     <tr>
       <td>{info.index}</td>
@@ -187,8 +184,8 @@ private[spark] class StagePage(parent: JobProgressUI) {
          </td>
       }}
       {if (shuffleWrite) {
-         <td>{metrics.flatMap{m => m.shuffleWriteMetrics}.map{s =>
-           parent.formatDuration(s.shuffleWriteTime / (1000 * 1000))}.getOrElse("")}
+         <td sorttable_customkey={writeTimeSortable}>
+           {writeTimeReadable}
          </td>
          <td sorttable_customkey={shuffleWriteSortable}>
            {shuffleWriteReadable}


[28/50] git commit: Minor formatting fix in config file

Posted by pw...@apache.org.
Minor formatting fix in config file


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/7a1d1c93
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/7a1d1c93
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/7a1d1c93

Branch: refs/heads/scala-2.10
Commit: 7a1d1c93b824a5b38c4ddeb16900dc6173482c83
Parents: 1b38f5f
Author: Patrick Wendell <pw...@gmail.com>
Authored: Fri Dec 6 20:28:22 2013 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Fri Dec 6 20:28:22 2013 -0800

----------------------------------------------------------------------
 docs/configuration.md | 1 -
 1 file changed, 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/7a1d1c93/docs/configuration.md
----------------------------------------------------------------------
diff --git a/docs/configuration.md b/docs/configuration.md
index 65b41b4..e86b9ea 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -335,7 +335,6 @@ Apart from these, the following properties are also available, and may be useful
   </td>
 </tr>
 <tr>
-<tr>
   <td>spark.speculation</td>
   <td>false</td>
   <td>


[46/50] git commit: added eclipse repository for spark streaming.

Posted by pw...@apache.org.
added eclipse repository for spark streaming.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/0b82b5af
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/0b82b5af
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/0b82b5af

Branch: refs/heads/scala-2.10
Commit: 0b82b5af1e9882268734081253a431973bc128cf
Parents: 17db6a9
Author: Prashant Sharma <sc...@gmail.com>
Authored: Wed Dec 11 08:15:44 2013 +0530
Committer: Prashant Sharma <sc...@gmail.com>
Committed: Wed Dec 11 08:17:02 2013 +0530

----------------------------------------------------------------------
 project/SparkBuild.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0b82b5af/project/SparkBuild.scala
----------------------------------------------------------------------
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 3584e88..e708aee 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -281,7 +281,7 @@ object SparkBuild extends Build {
   def streamingSettings = sharedSettings ++ Seq(
     name := "spark-streaming",
     resolvers ++= Seq(
-      "Akka Repository" at "http://repo.akka.io/releases/",
+      "Eclipse Repository" at "https://repo.eclipse.org/content/repositories/paho-releases/",
       "Apache repo" at "https://repository.apache.org/content/repositories/releases"
     ),
 


[26/50] git commit: Merge pull request #235 from pwendell/master

Posted by pw...@apache.org.
Merge pull request #235 from pwendell/master

Minor doc fixes and updating README


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/e5d5728b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/e5d5728b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/e5d5728b

Branch: refs/heads/scala-2.10
Commit: e5d5728b72e58046cc175ab06b5f1c7be4957711
Parents: 241336a bb6e25c
Author: Patrick Wendell <pw...@gmail.com>
Authored: Fri Dec 6 20:14:56 2013 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Fri Dec 6 20:14:56 2013 -0800

----------------------------------------------------------------------
 README.md                   | 7 ++++++-
 docs/building-with-maven.md | 4 +++-
 docs/index.md               | 2 +-
 3 files changed, 10 insertions(+), 3 deletions(-)
----------------------------------------------------------------------



[48/50] git commit: Merge branch 'akka-bug-fix' of github.com:ScrapCodes/incubator-spark into akka-bug-fix

Posted by pw...@apache.org.
Merge branch 'akka-bug-fix' of github.com:ScrapCodes/incubator-spark into akka-bug-fix


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/f4c73df5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/f4c73df5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/f4c73df5

Branch: refs/heads/scala-2.10
Commit: f4c73df5c981476c4ca431613d8b4a827eddf653
Parents: 603af51 0b82b5a
Author: Prashant Sharma <pr...@imaginea.com>
Authored: Wed Dec 11 10:22:44 2013 +0530
Committer: Prashant Sharma <pr...@imaginea.com>
Committed: Wed Dec 11 10:22:44 2013 +0530

----------------------------------------------------------------------
 project/SparkBuild.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/f4c73df5/project/SparkBuild.scala
----------------------------------------------------------------------


[29/50] git commit: Merge pull request #237 from pwendell/formatting-fix

Posted by pw...@apache.org.
Merge pull request #237 from pwendell/formatting-fix

Formatting fix

This is a single-line change. The diff appears larger here due to github being out of sync.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/10c3c0c6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/10c3c0c6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/10c3c0c6

Branch: refs/heads/scala-2.10
Commit: 10c3c0c6524d0cf6c59b6f2227bf316cdeb7d06c
Parents: 1b38f5f 7a1d1c9
Author: Patrick Wendell <pw...@gmail.com>
Authored: Fri Dec 6 20:29:45 2013 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Fri Dec 6 20:29:45 2013 -0800

----------------------------------------------------------------------
 docs/configuration.md | 1 -
 1 file changed, 1 deletion(-)
----------------------------------------------------------------------



[30/50] git commit: Incorporated Patrick's feedback comment on #211 and made maven build/dep-resolution atleast a bit faster.

Posted by pw...@apache.org.
Incorporated Patrick's feedback comment on #211 and made maven build/dep-resolution atleast a bit faster.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/7ad6921a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/7ad6921a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/7ad6921a

Branch: refs/heads/scala-2.10
Commit: 7ad6921ae0657ca806704f859d5b8c9ff26633e4
Parents: 94b5881
Author: Prashant Sharma <sc...@gmail.com>
Authored: Sat Dec 7 12:45:57 2013 +0530
Committer: Prashant Sharma <sc...@gmail.com>
Committed: Sat Dec 7 12:45:57 2013 +0530

----------------------------------------------------------------------
 assembly/pom.xml  |  2 +-
 bagel/pom.xml     |  2 +-
 core/pom.xml      |  2 +-
 examples/pom.xml  |  2 +-
 mllib/pom.xml     |  2 +-
 pom.xml           | 56 +++++---------------------------------------------
 repl-bin/pom.xml  |  2 +-
 repl/pom.xml      |  2 +-
 streaming/pom.xml |  2 +-
 tools/pom.xml     |  2 +-
 yarn/pom.xml      |  2 +-
 11 files changed, 15 insertions(+), 61 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/7ad6921a/assembly/pom.xml
----------------------------------------------------------------------
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 28b0692..8103534 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -26,7 +26,7 @@
   </parent>
 
   <groupId>org.apache.spark</groupId>
-  <artifactId>spark-assembly_${scala-short.version}</artifactId>
+  <artifactId>spark-assembly_2.10</artifactId>
   <name>Spark Project Assembly</name>
   <url>http://spark.incubator.apache.org/</url>
 

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/7ad6921a/bagel/pom.xml
----------------------------------------------------------------------
diff --git a/bagel/pom.xml b/bagel/pom.xml
index c8b9c4f..461e76a 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -26,7 +26,7 @@
   </parent>
 
   <groupId>org.apache.spark</groupId>
-  <artifactId>spark-bagel_${scala-short.version}</artifactId>
+  <artifactId>spark-bagel_2.10</artifactId>
   <packaging>jar</packaging>
   <name>Spark Project Bagel</name>
   <url>http://spark.incubator.apache.org/</url>

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/7ad6921a/core/pom.xml
----------------------------------------------------------------------
diff --git a/core/pom.xml b/core/pom.xml
index e2033c9..af605e1 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -26,7 +26,7 @@
   </parent>
 
   <groupId>org.apache.spark</groupId>
-  <artifactId>spark-core_${scala-short.version}</artifactId>
+  <artifactId>spark-core_2.10</artifactId>
   <packaging>jar</packaging>
   <name>Spark Project Core</name>
   <url>http://spark.incubator.apache.org/</url>

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/7ad6921a/examples/pom.xml
----------------------------------------------------------------------
diff --git a/examples/pom.xml b/examples/pom.xml
index a10dee7..464ad82 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -26,7 +26,7 @@
   </parent>
 
   <groupId>org.apache.spark</groupId>
-  <artifactId>spark-examples_${scala-short.version}</artifactId>
+  <artifactId>spark-examples_2.10</artifactId>
   <packaging>jar</packaging>
   <name>Spark Project Examples</name>
   <url>http://spark.incubator.apache.org/</url>

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/7ad6921a/mllib/pom.xml
----------------------------------------------------------------------
diff --git a/mllib/pom.xml b/mllib/pom.xml
index a57bdde..fce5b19 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -26,7 +26,7 @@
   </parent>
 
   <groupId>org.apache.spark</groupId>
-  <artifactId>spark-mllib_${scala-short.version}</artifactId>
+  <artifactId>spark-mllib_2.10</artifactId>
   <packaging>jar</packaging>
   <name>Spark Project ML Library</name>
   <url>http://spark.incubator.apache.org/</url>

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/7ad6921a/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 8700a48..4c11459 100644
--- a/pom.xml
+++ b/pom.xml
@@ -104,7 +104,7 @@
     <scala-short.version>2.10</scala-short.version>
     <scala.version>2.10.3</scala.version>
     <mesos.version>0.13.0</mesos.version>
-    <akka.version>2.2.1</akka.version>
+    <akka.version>2.2.3</akka.version>
     <slf4j.version>1.7.2</slf4j.version>
     <log4j.version>1.2.17</log4j.version>
     <hadoop.version>1.0.4</hadoop.version>
@@ -116,10 +116,10 @@
   </properties>
 
   <repositories>
-    <repository>
-      <id>typesafe-repo</id>
-      <name>Typesafe Repository</name>
-      <url>http://repo.typesafe.com/typesafe/releases/</url>
+    <repository> 
+      <id>maven-repo</id> <!-- This should be at top, it makes maven try the central repo first and then others and hence faster dep resolution -->
+      <name>Maven Repository</name>
+      <url>http://repo.maven.apache.org/maven2/</url>
       <releases>
         <enabled>true</enabled>
       </releases>
@@ -139,17 +139,6 @@
       </snapshots>
     </repository>
     <repository>
-      <id>akka-repo</id>
-      <name>Akka Repository</name>
-      <url>http://repo.akka.io/releases/</url>
-      <releases>
-        <enabled>true</enabled>
-      </releases>
-      <snapshots>
-        <enabled>false</enabled>
-      </snapshots>
-    </repository>
-    <repository>
       <id>mqtt-repo</id>
       <name>MQTT Repository</name>
       <url>https://repo.eclipse.org/content/repositories/paho-releases/</url>
@@ -161,41 +150,6 @@
       </snapshots>
     </repository>
   </repositories>
-  <pluginRepositories>
-    <pluginRepository>
-      <id>oss-sonatype-releases</id>
-      <name>OSS Sonatype</name>
-      <url>https://oss.sonatype.org/content/repositories/releases</url>
-      <releases>
-        <enabled>true</enabled>
-      </releases>
-      <snapshots>
-        <enabled>false</enabled>
-      </snapshots>
-    </pluginRepository>
-    <pluginRepository>
-      <id>oss-sonatype-snapshots</id>
-      <name>OSS Sonatype</name>
-      <url>https://oss.sonatype.org/content/repositories/snapshots</url>
-      <releases>
-        <enabled>false</enabled>
-      </releases>
-      <snapshots>
-        <enabled>true</enabled>
-      </snapshots>
-    </pluginRepository>
-    <pluginRepository>
-      <id>oss-sonatype</id>
-      <name>OSS Sonatype</name>
-      <url>https://oss.sonatype.org/content/groups/public</url>
-      <releases>
-        <enabled>true</enabled>
-      </releases>
-      <snapshots>
-        <enabled>true</enabled>
-      </snapshots>
-    </pluginRepository>
-  </pluginRepositories>
 
   <dependencyManagement>
     <dependencies>

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/7ad6921a/repl-bin/pom.xml
----------------------------------------------------------------------
diff --git a/repl-bin/pom.xml b/repl-bin/pom.xml
index c983ea5..f8a17d9 100644
--- a/repl-bin/pom.xml
+++ b/repl-bin/pom.xml
@@ -26,7 +26,7 @@
   </parent>
 
   <groupId>org.apache.spark</groupId>
-  <artifactId>spark-repl-bin_${scala-short.version}</artifactId>
+  <artifactId>spark-repl-bin_2.10</artifactId>
   <packaging>pom</packaging>
   <name>Spark Project REPL binary packaging</name>
   <url>http://spark.incubator.apache.org/</url>

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/7ad6921a/repl/pom.xml
----------------------------------------------------------------------
diff --git a/repl/pom.xml b/repl/pom.xml
index ff66493..2f27e76 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -26,7 +26,7 @@
   </parent>
 
   <groupId>org.apache.spark</groupId>
-  <artifactId>spark-repl_${scala-short.version}</artifactId>
+  <artifactId>spark-repl_2.10</artifactId>
   <packaging>jar</packaging>
   <name>Spark Project REPL</name>
   <url>http://spark.incubator.apache.org/</url>

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/7ad6921a/streaming/pom.xml
----------------------------------------------------------------------
diff --git a/streaming/pom.xml b/streaming/pom.xml
index fb15681..ff95591 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -26,7 +26,7 @@
   </parent>
 
   <groupId>org.apache.spark</groupId>
-  <artifactId>spark-streaming_${scala-short.version}</artifactId>
+  <artifactId>spark-streaming_2.10</artifactId>
   <packaging>jar</packaging>
   <name>Spark Project Streaming</name>
   <url>http://spark.incubator.apache.org/</url>

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/7ad6921a/tools/pom.xml
----------------------------------------------------------------------
diff --git a/tools/pom.xml b/tools/pom.xml
index db87b54..353d201 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -25,7 +25,7 @@
   </parent>
 
   <groupId>org.apache.spark</groupId>
-  <artifactId>spark-tools_${scala-short.version}</artifactId>
+  <artifactId>spark-tools_2.10</artifactId>
   <packaging>jar</packaging>
   <name>Spark Project Tools</name>
   <url>http://spark.incubator.apache.org/</url>

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/7ad6921a/yarn/pom.xml
----------------------------------------------------------------------
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 12bc97d..5cf8157 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -25,7 +25,7 @@
   </parent>
 
   <groupId>org.apache.spark</groupId>
-  <artifactId>spark-yarn_${scala-short.version}</artifactId>
+  <artifactId>spark-yarn_2.10</artifactId>
   <packaging>jar</packaging>
   <name>Spark Project YARN Support</name>
   <url>http://spark.incubator.apache.org/</url>


[38/50] git commit: Merge pull request #242 from pwendell/master

Posted by pw...@apache.org.
Merge pull request #242 from pwendell/master

Update broken links and add HDP 2.0 version string

I ran a link checker on the UI and found several broken links.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/1f4a4bcc
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/1f4a4bcc
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/1f4a4bcc

Branch: refs/heads/scala-2.10
Commit: 1f4a4bccf3cf7376c634bad2ebadfdd4c6f78195
Parents: 6494d62 0428145
Author: Patrick Wendell <pw...@gmail.com>
Authored: Sat Dec 7 22:34:34 2013 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Sat Dec 7 22:34:34 2013 -0800

----------------------------------------------------------------------
 docs/bagel-programming-guide.md          | 2 +-
 docs/hadoop-third-party-distributions.md | 3 ++-
 docs/index.md                            | 2 +-
 docs/job-scheduling.md                   | 2 +-
 docs/running-on-yarn.md                  | 4 ++--
 docs/streaming-programming-guide.md      | 8 ++++----
 6 files changed, 11 insertions(+), 10 deletions(-)
----------------------------------------------------------------------



[14/50] git commit: Merge branch 'wip-scala-2.10' into akka-bug-fix

Posted by pw...@apache.org.
Merge branch 'wip-scala-2.10' into akka-bug-fix


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/c9cd2af7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/c9cd2af7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/c9cd2af7

Branch: refs/heads/scala-2.10
Commit: c9cd2af71eae4126536db790ceaffe0587da7d89
Parents: 09e8be9 4e70480
Author: Prashant Sharma <pr...@imaginea.com>
Authored: Fri Dec 6 13:32:15 2013 +0530
Committer: Prashant Sharma <pr...@imaginea.com>
Committed: Fri Dec 6 13:32:15 2013 +0530

----------------------------------------------------------------------
 .../org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala  | 2 +-
 .../main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------



[07/50] git commit: Forcing shuffle consolidation in DiskBlockManagerSuite

Posted by pw...@apache.org.
Forcing shuffle consolidation in DiskBlockManagerSuite


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/75d161b3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/75d161b3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/75d161b3

Branch: refs/heads/scala-2.10
Commit: 75d161b35702b6790aa66fff06b07f306442f5a3
Parents: 1450b8e
Author: Patrick Wendell <pw...@gmail.com>
Authored: Thu Dec 5 11:36:16 2013 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Thu Dec 5 11:36:41 2013 -0800

----------------------------------------------------------------------
 .../apache/spark/storage/DiskBlockManagerSuite.scala  | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/75d161b3/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
index 0b90563..ef4c4c0 100644
--- a/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
@@ -5,9 +5,9 @@ import java.io.{FileWriter, File}
 import scala.collection.mutable
 
 import com.google.common.io.Files
-import org.scalatest.{BeforeAndAfterEach, FunSuite}
+import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite}
 
-class DiskBlockManagerSuite extends FunSuite with BeforeAndAfterEach {
+class DiskBlockManagerSuite extends FunSuite with BeforeAndAfterEach with BeforeAndAfterAll {
 
   val rootDir0 = Files.createTempDir()
   rootDir0.deleteOnExit()
@@ -16,6 +16,12 @@ class DiskBlockManagerSuite extends FunSuite with BeforeAndAfterEach {
   val rootDirs = rootDir0.getName + "," + rootDir1.getName
   println("Created root dirs: " + rootDirs)
 
+  // This suite focuses primarily on consolidation features,
+  // so we coerce consolidation if not already enabled.
+  val consolidateProp = "spark.shuffle.consolidateFiles"
+  val oldConsolidate = Option(System.getProperty(consolidateProp))
+  System.setProperty(consolidateProp, "true")
+
   val shuffleBlockManager = new ShuffleBlockManager(null) {
     var idToSegmentMap = mutable.Map[ShuffleBlockId, FileSegment]()
     override def getBlockLocation(id: ShuffleBlockId) = idToSegmentMap(id)
@@ -23,6 +29,10 @@ class DiskBlockManagerSuite extends FunSuite with BeforeAndAfterEach {
 
   var diskBlockManager: DiskBlockManager = _
 
+  override def afterAll() {
+    oldConsolidate.map(c => System.setProperty(consolidateProp, c))
+  }
+
   override def beforeEach() {
     diskBlockManager = new DiskBlockManager(shuffleBlockManager, rootDirs)
     shuffleBlockManager.idToSegmentMap.clear()


[44/50] git commit: README incorrectly suggests build sources spark-env.sh

Posted by pw...@apache.org.
README incorrectly suggests build sources spark-env.sh

This is misleading because the build doesn't source that file. IMO
it's better to force people to specify build environment variables
on the command line always, like we do in every example.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/153cad12
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/153cad12
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/153cad12

Branch: refs/heads/scala-2.10
Commit: 153cad1293efb7947f5c3d01c7209b5b035e63c6
Parents: 5b74609
Author: Patrick Wendell <pw...@gmail.com>
Authored: Tue Dec 10 12:53:45 2013 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Tue Dec 10 12:54:28 2013 -0800

----------------------------------------------------------------------
 README.md | 3 ---
 1 file changed, 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/153cad12/README.md
----------------------------------------------------------------------
diff --git a/README.md b/README.md
index 8c7853e..7faba27 100644
--- a/README.md
+++ b/README.md
@@ -69,9 +69,6 @@ When building for Hadoop 2.2.X and newer, you'll need to include the additional
     # Apache Hadoop 2.2.X and newer
     $ mvn -Dyarn.version=2.2.0 -Dhadoop.version=2.2.0 -Pnew-yarn
 
-For convenience, these variables may also be set through the `conf/spark-env.sh` file
-described below.
-
 When developing a Spark application, specify the Hadoop version by adding the
 "hadoop-client" artifact to your project's dependencies. For example, if you're
 using Hadoop 1.2.1 and build your application using SBT, add this entry to


[23/50] git commit: Merge pull request #234 from alig/master

Posted by pw...@apache.org.
Merge pull request #234 from alig/master

Updated documentation about the YARN v2.2 build process


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/241336ad
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/241336ad
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/241336ad

Branch: refs/heads/scala-2.10
Commit: 241336add5be07fca5ff6c17eed368df7d0c3e3c
Parents: e039234 e2c2914
Author: Patrick Wendell <pw...@gmail.com>
Authored: Fri Dec 6 17:29:03 2013 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Fri Dec 6 17:29:03 2013 -0800

----------------------------------------------------------------------
 docs/building-with-maven.md | 4 ++++
 docs/cluster-overview.md    | 2 +-
 docs/index.md               | 6 ++++--
 docs/running-on-yarn.md     | 8 ++++++++
 4 files changed, 17 insertions(+), 3 deletions(-)
----------------------------------------------------------------------



[37/50] git commit: Small fix

Posted by pw...@apache.org.
Small fix


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/0428145e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/0428145e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/0428145e

Branch: refs/heads/scala-2.10
Commit: 0428145ed419c63d689078387d80584c7105b0b7
Parents: b3e87c0
Author: Patrick Wendell <pw...@gmail.com>
Authored: Sat Dec 7 22:32:41 2013 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Sat Dec 7 22:33:11 2013 -0800

----------------------------------------------------------------------
 docs/hadoop-third-party-distributions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0428145e/docs/hadoop-third-party-distributions.md
----------------------------------------------------------------------
diff --git a/docs/hadoop-third-party-distributions.md b/docs/hadoop-third-party-distributions.md
index 268944f..de6a2b0 100644
--- a/docs/hadoop-third-party-distributions.md
+++ b/docs/hadoop-third-party-distributions.md
@@ -39,7 +39,7 @@ the _exact_ Hadoop version you are running to avoid any compatibility errors.
         <tr><td>HDP 1.3</td><td>1.2.0</td></tr>
         <tr><td>HDP 1.2</td><td>1.1.2</td></tr>
         <tr><td>HDP 1.1</td><td>1.0.3</td></tr>
-	<tr><td>HDP 1.0</td><td>1.0.3</td></tr>
+        <tr><td>HDP 1.0</td><td>1.0.3</td></tr>
         <tr><td>HDP 2.0</td><td>2.2.0</td></tr>
       </table>
     </td>


[09/50] git commit: jobWaiter.synchronized before jobWaiter.wait

Posted by pw...@apache.org.
jobWaiter.synchronized before jobWaiter.wait


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/aebb123f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/aebb123f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/aebb123f

Branch: refs/heads/scala-2.10
Commit: aebb123fd3b4bf0d57d867f33ca0325340ee42e4
Parents: 5d46025
Author: Mark Hamstra <ma...@gmail.com>
Authored: Thu Dec 5 17:16:44 2013 -0800
Committer: Mark Hamstra <ma...@gmail.com>
Committed: Thu Dec 5 17:16:44 2013 -0800

----------------------------------------------------------------------
 core/src/main/scala/org/apache/spark/FutureAction.scala        | 2 +-
 core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/aebb123f/core/src/main/scala/org/apache/spark/FutureAction.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/FutureAction.scala b/core/src/main/scala/org/apache/spark/FutureAction.scala
index 1ad9240..c6b4ac5 100644
--- a/core/src/main/scala/org/apache/spark/FutureAction.scala
+++ b/core/src/main/scala/org/apache/spark/FutureAction.scala
@@ -99,7 +99,7 @@ class SimpleFutureAction[T] private[spark](jobWaiter: JobWaiter[_], resultFunc:
   override def ready(atMost: Duration)(implicit permit: CanAwait): SimpleFutureAction.this.type = {
     if (!atMost.isFinite()) {
       awaitResult()
-    } else {
+    } else jobWaiter.synchronized {
       val finishTime = System.currentTimeMillis() + atMost.toMillis
       while (!isCompleted) {
         val time = System.currentTimeMillis()

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/aebb123f/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala b/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala
index 58f238d..b026f86 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala
@@ -31,6 +31,7 @@ private[spark] class JobWaiter[T](
   private var finishedTasks = 0
 
   // Is the job as a whole finished (succeeded or failed)?
+  @volatile
   private var _jobFinished = totalTasks == 0
 
   def jobFinished = _jobFinished


[27/50] git commit: Merge pull request #236 from pwendell/shuffle-docs

Posted by pw...@apache.org.
Merge pull request #236 from pwendell/shuffle-docs

Adding disclaimer for shuffle file consolidation


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/1b38f5f2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/1b38f5f2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/1b38f5f2

Branch: refs/heads/scala-2.10
Commit: 1b38f5f2774982d524742e987b6cef26ccaae676
Parents: e5d5728 b9451ac
Author: Patrick Wendell <pw...@gmail.com>
Authored: Fri Dec 6 20:16:15 2013 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Fri Dec 6 20:16:15 2013 -0800

----------------------------------------------------------------------
 docs/configuration.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------



[18/50] git commit: Merge pull request #205 from kayousterhout/logging

Posted by pw...@apache.org.
Merge pull request #205 from kayousterhout/logging

Added logging of scheduler delays to UI

This commit adds two metrics to the UI:

1) The time to get task results, if they're fetched remotely

2) The scheduler delay.  When the scheduler starts getting overwhelmed (because it can't keep up with the rate at which tasks are being submitted), the result is that tasks get delayed on the tail-end: the message from the worker saying that the task has completed ends up in a long queue and takes a while to be processed by the scheduler.  This commit records that delay in the UI so that users can tell when the scheduler is becoming the bottleneck.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/3fb302c0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/3fb302c0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/3fb302c0

Branch: refs/heads/scala-2.10
Commit: 3fb302c08d078decd1fa7dd0fc008faff132ab7f
Parents: 87676a6 58b3aff
Author: Matei Zaharia <ma...@eecs.berkeley.edu>
Authored: Fri Dec 6 11:03:32 2013 -0800
Committer: Matei Zaharia <ma...@eecs.berkeley.edu>
Committed: Fri Dec 6 11:03:32 2013 -0800

----------------------------------------------------------------------
 .../org/apache/spark/ui/jobs/StagePage.scala    | 36 ++++++++++++++++++--
 1 file changed, 34 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/3fb302c0/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
----------------------------------------------------------------------


[47/50] git commit: Merge branch 'master' into akka-bug-fix

Posted by pw...@apache.org.
Merge branch 'master' into akka-bug-fix

Conflicts:
	core/pom.xml
	core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
	pom.xml
	project/SparkBuild.scala
	streaming/pom.xml
	yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/603af51b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/603af51b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/603af51b

Branch: refs/heads/scala-2.10
Commit: 603af51bb5257744ce0db28e7f10db6a2ba899ec
Parents: 17db6a9 d2efe13
Author: Prashant Sharma <pr...@imaginea.com>
Authored: Wed Dec 11 10:21:53 2013 +0530
Committer: Prashant Sharma <pr...@imaginea.com>
Committed: Wed Dec 11 10:21:53 2013 +0530

----------------------------------------------------------------------
 README.md                                       |   8 +-
 core/pom.xml                                    |  12 +-
 .../scala/org/apache/spark/FutureAction.scala   |   2 +-
 .../org/apache/spark/MapOutputTracker.scala     |   8 +-
 .../scala/org/apache/spark/SparkContext.scala   | 236 +++----
 .../main/scala/org/apache/spark/rdd/RDD.scala   |  10 +-
 .../apache/spark/rdd/ZippedPartitionsRDD.scala  |  27 +-
 .../apache/spark/scheduler/DAGScheduler.scala   | 382 ++++++++---
 .../spark/scheduler/DAGSchedulerEvent.scala     |   5 +-
 .../org/apache/spark/scheduler/JobWaiter.scala  |   1 +
 .../apache/spark/scheduler/SparkListener.scala  |   2 +-
 .../scheduler/cluster/ClusterScheduler.scala    |   8 +-
 .../cluster/ClusterTaskSetManager.scala         |   2 +-
 .../spark/scheduler/local/LocalScheduler.scala  |  29 +-
 .../spark/storage/ShuffleBlockManager.scala     |   2 +-
 .../org/apache/spark/storage/StorageLevel.scala |   2 +-
 .../spark/storage/StoragePerfTester.scala       |  17 +
 .../org/apache/spark/ui/jobs/StagePage.scala    |  65 +-
 .../org/apache/spark/JobCancellationSuite.scala |   4 +-
 .../SparkContextSchedulerCreationSuite.scala    | 140 ++++
 .../deploy/worker/ExecutorRunnerTest.scala      |  17 +
 .../apache/spark/rdd/AsyncRDDActionsSuite.scala |  26 +
 .../spark/scheduler/DAGSchedulerSuite.scala     |  43 +-
 .../spark/storage/DiskBlockManagerSuite.scala   |  31 +-
 .../util/collection/OpenHashMapSuite.scala      |  17 +
 .../util/collection/OpenHashSetSuite.scala      |  17 +
 .../PrimitiveKeyOpenHashMapSuite.scala          |  17 +
 docs/_layouts/global.html                       |   8 +-
 docs/bagel-programming-guide.md                 |   2 +-
 docs/building-with-maven.md                     |   6 +
 docs/cluster-overview.md                        |   2 +-
 docs/configuration.md                           |  36 +-
 docs/hadoop-third-party-distributions.md        |   3 +-
 docs/index.md                                   |   8 +-
 docs/job-scheduling.md                          |   2 +-
 docs/running-on-yarn.md                         |   8 +
 docs/spark-standalone.md                        |   4 +-
 docs/streaming-programming-guide.md             |   8 +-
 new-yarn/pom.xml                                | 161 +++++
 .../spark/deploy/yarn/ApplicationMaster.scala   | 446 ++++++++++++
 .../yarn/ApplicationMasterArguments.scala       |  94 +++
 .../org/apache/spark/deploy/yarn/Client.scala   | 519 ++++++++++++++
 .../spark/deploy/yarn/ClientArguments.scala     | 148 ++++
 .../yarn/ClientDistributedCacheManager.scala    | 228 ++++++
 .../spark/deploy/yarn/WorkerLauncher.scala      | 223 ++++++
 .../spark/deploy/yarn/WorkerRunnable.scala      | 209 ++++++
 .../deploy/yarn/YarnAllocationHandler.scala     | 687 +++++++++++++++++++
 .../spark/deploy/yarn/YarnSparkHadoopUtil.scala |  43 ++
 .../cluster/YarnClientClusterScheduler.scala    |  47 ++
 .../cluster/YarnClientSchedulerBackend.scala    | 109 +++
 .../cluster/YarnClusterScheduler.scala          |  55 ++
 .../ClientDistributedCacheManagerSuite.scala    | 220 ++++++
 pom.xml                                         |  61 +-
 project/SparkBuild.scala                        |  30 +-
 python/pyspark/rdd.py                           |   5 +-
 python/pyspark/tests.py                         |  15 +
 python/test_support/userlibrary.py              |  17 +
 repl-bin/src/deb/bin/spark-executor             |   2 +-
 repl-bin/src/deb/bin/spark-shell                |   2 +-
 streaming/pom.xml                               |   9 +-
 .../spark/deploy/yarn/ApplicationMaster.scala   | 172 ++---
 .../org/apache/spark/deploy/yarn/Client.scala   | 151 ++--
 .../spark/deploy/yarn/WorkerRunnable.scala      |  85 ++-
 .../deploy/yarn/YarnAllocationHandler.scala     | 346 ++++++----
 64 files changed, 4656 insertions(+), 645 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/603af51b/README.md
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/603af51b/core/pom.xml
----------------------------------------------------------------------
diff --cc core/pom.xml
index 71bf15c,38f4be1..3fe48fd
--- a/core/pom.xml
+++ b/core/pom.xml
@@@ -95,12 -95,20 +95,16 @@@
        <version>0.3.1</version>
      </dependency>
      <dependency>
-       <groupId>com.typesafe.akka</groupId>
+       <groupId>${akka.group}</groupId>
 -      <artifactId>akka-actor</artifactId>
++      <artifactId>akka-actor_2.10</artifactId>
+     </dependency>
+     <dependency>
+       <groupId>${akka.group}</groupId>
 -      <artifactId>akka-remote</artifactId>
 +      <artifactId>akka-remote_2.10</artifactId>
      </dependency>
      <dependency>
-       <groupId>com.typesafe.akka</groupId>
+       <groupId>${akka.group}</groupId>
 -      <artifactId>akka-slf4j</artifactId>
 -    </dependency>
 -    <dependency>
 -      <groupId>org.scala-lang</groupId>
 -      <artifactId>scalap</artifactId>
 +      <artifactId>akka-slf4j_2.10</artifactId>
      </dependency>
      <dependency>
        <groupId>org.scala-lang</groupId>

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/603af51b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/603af51b/core/src/main/scala/org/apache/spark/SparkContext.scala
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/603af51b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
----------------------------------------------------------------------
diff --cc core/src/main/scala/org/apache/spark/rdd/RDD.scala
index f80d3d6,893708f..ea45566
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@@ -938,9 -940,9 +938,9 @@@ abstract class RDD[T: ClassTag]
    private var storageLevel: StorageLevel = StorageLevel.NONE
  
    /** Record user function generating this RDD. */
-   private[spark] val origin = Utils.formatSparkCallSite
+   @transient private[spark] val origin = Utils.formatSparkCallSite
  
 -  private[spark] def elementClassManifest: ClassManifest[T] = classManifest[T]
 +  private[spark] def elementClassTag: ClassTag[T] = classTag[T]
  
    private[spark] var checkpointData: Option[RDDCheckpointData[T]] = None
  

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/603af51b/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/603af51b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
----------------------------------------------------------------------
diff --cc core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index 201572d,f9cd021..963d15b
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@@ -180,6 -158,56 +160,57 @@@ class DAGScheduler
  
    val metadataCleaner = new MetadataCleaner(MetadataCleanerType.DAG_SCHEDULER, this.cleanup)
  
+   /**
+    * Starts the event processing actor.  The actor has two responsibilities:
+    *
+    * 1. Waits for events like job submission, task finished, task failure etc., and calls
+    *    [[org.apache.spark.scheduler.DAGScheduler.processEvent()]] to process them.
+    * 2. Schedules a periodical task to resubmit failed stages.
+    *
+    * NOTE: the actor cannot be started in the constructor, because the periodical task references
+    * some internal states of the enclosing [[org.apache.spark.scheduler.DAGScheduler]] object, thus
+    * cannot be scheduled until the [[org.apache.spark.scheduler.DAGScheduler]] is fully constructed.
+    */
+   def start() {
+     eventProcessActor = env.actorSystem.actorOf(Props(new Actor {
+       /**
+        * A handle to the periodical task, used to cancel the task when the actor is stopped.
+        */
+       var resubmissionTask: Cancellable = _
+ 
+       override def preStart() {
++        import context.dispatcher
+         /**
+          * A message is sent to the actor itself periodically to remind the actor to resubmit failed
+          * stages.  In this way, stage resubmission can be done within the same thread context of
+          * other event processing logic to avoid unnecessary synchronization overhead.
+          */
+         resubmissionTask = context.system.scheduler.schedule(
 -          RESUBMIT_TIMEOUT.millis, RESUBMIT_TIMEOUT.millis, self, ResubmitFailedStages)
++          RESUBMIT_TIMEOUT, RESUBMIT_TIMEOUT, self, ResubmitFailedStages)
+       }
+ 
+       /**
+        * The main event loop of the DAG scheduler.
+        */
+       def receive = {
+         case event: DAGSchedulerEvent =>
+           logDebug("Got event of type " + event.getClass.getName)
+ 
+           /**
+            * All events are forwarded to `processEvent()`, so that the event processing logic can
+            * easily tested without starting a dedicated actor.  Please refer to `DAGSchedulerSuite`
+            * for details.
+            */
+           if (!processEvent(event)) {
+             submitWaitingStages()
+           } else {
+             resubmissionTask.cancel()
+             context.stop(self)
+           }
+       }
+     }))
+   }
+ 
    def addSparkListener(listener: SparkListener) {
      listenerBus.addListener(listener)
    }

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/603af51b/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterScheduler.scala
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/603af51b/docs/configuration.md
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/603af51b/pom.xml
----------------------------------------------------------------------
diff --cc pom.xml
index 979fd0c,9348c77..6906ad2
--- a/pom.xml
+++ b/pom.xml
@@@ -99,11 -99,12 +99,13 @@@
      <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
      <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
  
 -    <java.version>1.5</java.version>
 -    <scala.version>2.9.3</scala.version>
 +    <java.version>1.6</java.version>
 +
 +    <scala.version>2.10.3</scala.version>
      <mesos.version>0.13.0</mesos.version>
 +    <akka.version>2.2.3</akka.version>
+     <akka.group>com.typesafe.akka</akka.group>
 -    <akka.version>2.0.5</akka.version>
+     <protobuf.version>2.4.1</protobuf.version>
      <slf4j.version>1.7.2</slf4j.version>
      <log4j.version>1.2.17</log4j.version>
      <hadoop.version>1.0.4</hadoop.version>
@@@ -213,8 -260,8 +215,8 @@@
          <version>0.3.1</version>
        </dependency>
        <dependency>
-         <groupId>com.typesafe.akka</groupId>
+         <groupId>${akka.group}</groupId>
 -        <artifactId>akka-actor</artifactId>
 +        <artifactId>akka-actor_2.10</artifactId>
          <version>${akka.version}</version>
          <exclusions>
            <exclusion>
@@@ -224,8 -271,8 +226,8 @@@
          </exclusions>
        </dependency>
        <dependency>
-         <groupId>com.typesafe.akka</groupId>
+         <groupId>${akka.group}</groupId>
 -        <artifactId>akka-remote</artifactId>
 +        <artifactId>akka-remote_2.10</artifactId>
          <version>${akka.version}</version>
          <exclusions>
            <exclusion>
@@@ -235,8 -282,8 +237,8 @@@
          </exclusions>
        </dependency>
        <dependency>
-         <groupId>com.typesafe.akka</groupId>
+         <groupId>${akka.group}</groupId>
 -        <artifactId>akka-slf4j</artifactId>
 +        <artifactId>akka-slf4j_2.10</artifactId>
          <version>${akka.version}</version>
          <exclusions>
            <exclusion>
@@@ -246,6 -293,17 +248,17 @@@
          </exclusions>
        </dependency>
        <dependency>
+         <groupId>${akka.group}</groupId>
 -        <artifactId>akka-zeromq</artifactId>
++        <artifactId>akka-zeromq_2.10</artifactId>
+         <version>${akka.version}</version>
+         <exclusions>
+           <exclusion>
+             <groupId>org.jboss.netty</groupId>
+             <artifactId>netty</artifactId>
+           </exclusion>
+         </exclusions>
+       </dependency>
+       <dependency>
          <groupId>it.unimi.dsi</groupId>
          <artifactId>fastutil</artifactId>
          <version>6.4.4</version>

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/603af51b/project/SparkBuild.scala
----------------------------------------------------------------------
diff --cc project/SparkBuild.scala
index 3584e88,ac87cff..ea7bf96
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@@ -200,36 -218,38 +216,36 @@@ object SparkBuild extends Build 
      ),
  
      libraryDependencies ++= Seq(
 -      "com.google.guava" % "guava" % "14.0.1",
 -      "com.google.code.findbugs" % "jsr305" % "1.3.9",
 -      "log4j" % "log4j" % "1.2.17",
 -      "org.slf4j" % "slf4j-api" % slf4jVersion,
 -      "org.slf4j" % "slf4j-log4j12" % slf4jVersion,
 -      "commons-daemon" % "commons-daemon" % "1.0.10",  // workaround for bug HADOOP-9407
 -      "com.ning" % "compress-lzf" % "0.8.4",
 -      "org.xerial.snappy" % "snappy-java" % "1.0.5",
 -      "org.ow2.asm" % "asm" % "4.0",
 -      "com.google.protobuf" % "protobuf-java" % protobufVersion,
 -      akkaGroup % "akka-actor" % akkaVersion excludeAll(excludeNetty),
 -      akkaGroup % "akka-remote" % akkaVersion excludeAll(excludeNetty),
 -      akkaGroup % "akka-slf4j" % akkaVersion excludeAll(excludeNetty),
 -      "it.unimi.dsi" % "fastutil" % "6.4.4",
 -      "colt" % "colt" % "1.2.0",
 -      "net.liftweb" % "lift-json_2.9.2" % "2.5",
 -      "org.apache.mesos" % "mesos" % "0.13.0",
 -      "io.netty" % "netty-all" % "4.0.0.Beta2",
 -      "org.apache.derby" % "derby" % "10.4.2.0" % "test",
 -      "org.apache.hadoop" % "hadoop-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib),
 -      "net.java.dev.jets3t" % "jets3t" % "0.7.1",
 -      "org.apache.avro" % "avro" % "1.7.4",
 -      "org.apache.avro" % "avro-ipc" % "1.7.4" excludeAll(excludeNetty),
 -      "org.apache.zookeeper" % "zookeeper" % "3.4.5" excludeAll(excludeNetty),
 -      "com.codahale.metrics" % "metrics-core" % "3.0.0",
 -      "com.codahale.metrics" % "metrics-jvm" % "3.0.0",
 -      "com.codahale.metrics" % "metrics-json" % "3.0.0",
 -      "com.codahale.metrics" % "metrics-ganglia" % "3.0.0",
 -      "com.codahale.metrics" % "metrics-graphite" % "3.0.0",
 -      "com.twitter" % "chill_2.9.3" % "0.3.1",
 -      "com.twitter" % "chill-java" % "0.3.1"
 -    )
 +        "com.google.guava"         % "guava"            % "14.0.1",
 +        "com.google.code.findbugs" % "jsr305"           % "1.3.9",
 +        "log4j"                    % "log4j"            % "1.2.17",
 +        "org.slf4j"                % "slf4j-api"        % slf4jVersion,
 +        "org.slf4j"                % "slf4j-log4j12"    % slf4jVersion,
 +        "commons-daemon"           % "commons-daemon"   % "1.0.10", // workaround for bug HADOOP-9407
 +        "com.ning"                 % "compress-lzf"     % "0.8.4",
 +        "org.xerial.snappy"        % "snappy-java"      % "1.0.5",
 +        "org.ow2.asm"              % "asm"              % "4.0",
 +        "com.google.protobuf"      % "protobuf-java"    % "2.4.1",
-         "com.typesafe.akka"       %% "akka-remote"      % "2.2.3"  excludeAll(excludeNetty), 
-         "com.typesafe.akka"       %% "akka-slf4j"       % "2.2.3"  excludeAll(excludeNetty),
++        akkaGroup                 %% "akka-remote"      % "2.2.3"  excludeAll(excludeNetty),
++        akkaGroup                 %% "akka-slf4j"       % "2.2.3"  excludeAll(excludeNetty),
 +        "net.liftweb"             %% "lift-json"        % "2.5.1"  excludeAll(excludeNetty),
 +        "it.unimi.dsi"             % "fastutil"         % "6.4.4",
 +        "colt"                     % "colt"             % "1.2.0",
 +        "org.apache.mesos"         % "mesos"            % "0.13.0",
 +        "net.java.dev.jets3t"      % "jets3t"           % "0.7.1",
 +        "org.apache.derby"         % "derby"            % "10.4.2.0"                     % "test",
 +        "org.apache.hadoop"        % "hadoop-client"    % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib),
 +        "org.apache.avro"          % "avro"             % "1.7.4",
 +        "org.apache.avro"          % "avro-ipc"         % "1.7.4" excludeAll(excludeNetty),
 +        "org.apache.zookeeper"     % "zookeeper"        % "3.4.5" excludeAll(excludeNetty),
 +        "com.codahale.metrics"     % "metrics-core"     % "3.0.0",
 +        "com.codahale.metrics"     % "metrics-jvm"      % "3.0.0",
 +        "com.codahale.metrics"     % "metrics-json"     % "3.0.0",
 +        "com.codahale.metrics"     % "metrics-ganglia"  % "3.0.0",
 +        "com.codahale.metrics"     % "metrics-graphite" % "3.0.0",
 +        "com.twitter"             %% "chill"            % "0.3.1",
 +        "com.twitter"              % "chill-java"       % "0.3.1"
 +      )
    )
  
    def rootSettings = sharedSettings ++ Seq(
@@@ -291,11 -311,6 +307,11 @@@
          exclude("com.sun.jdmk", "jmxtools")
          exclude("com.sun.jmx", "jmxri")
          exclude("net.sf.jopt-simple", "jopt-simple")
 +        excludeAll(excludeNetty),
 +      "org.eclipse.paho"      % "mqtt-client"      % "0.4.0",
 +      "com.github.sgroschupf" % "zkclient"         % "0.1"                excludeAll(excludeNetty),
 +      "org.twitter4j"         % "twitter4j-stream" % "3.0.3"              excludeAll(excludeNetty),
-       "com.typesafe.akka"    %%  "akka-zeromq"     % "2.2.3"              excludeAll(excludeNetty)
++      akkaGroup              %% "akka-zeromq"      % "2.2.3"              excludeAll(excludeNetty)
      )
    )
  

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/603af51b/python/pyspark/rdd.py
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/603af51b/streaming/pom.xml
----------------------------------------------------------------------
diff --cc streaming/pom.xml
index 298bc83,4089293..e27b437
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@@ -110,15 -110,8 +110,8 @@@
        <artifactId>scala-library</artifactId>
      </dependency>
      <dependency>
-       <groupId>com.typesafe.akka</groupId>
+       <groupId>${akka.group}</groupId>
 -      <artifactId>akka-zeromq</artifactId>
 +      <artifactId>akka-zeromq_2.10</artifactId>
-       <version>${akka.version}</version>
-       <exclusions>
-         <exclusion>
-           <groupId>org.jboss.netty</groupId>
-           <artifactId>netty</artifactId>
-         </exclusion>
-       </exclusions>
      </dependency>
      <dependency>
        <groupId>org.scalatest</groupId>

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/603af51b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
----------------------------------------------------------------------
diff --cc yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
index a6ce1b6,f15f3c7..9ab2073
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
@@@ -17,24 -17,30 +17,30 @@@
  
  package org.apache.spark.deploy.yarn
  
+ import java.lang.{Boolean => JBoolean}
+ import java.util.{Collections, Set => JSet}
+ import java.util.concurrent.{CopyOnWriteArrayList, ConcurrentHashMap}
+ import java.util.concurrent.atomic.AtomicInteger
+ 
+ import scala.collection
+ import scala.collection.JavaConversions._
+ import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
+ 
  import org.apache.spark.Logging
- import org.apache.spark.util.Utils
  import org.apache.spark.scheduler.SplitInfo
- import scala.collection
- import org.apache.hadoop.yarn.api.records.{AMResponse, ApplicationAttemptId, ContainerId, Priority, Resource, ResourceRequest, ContainerStatus, Container}
  import org.apache.spark.scheduler.cluster.{ClusterScheduler, CoarseGrainedSchedulerBackend}
+ import org.apache.spark.util.Utils
+ 
+ import org.apache.hadoop.conf.Configuration
+ import org.apache.hadoop.yarn.api.AMRMProtocol
+ import org.apache.hadoop.yarn.api.records.{AMResponse, ApplicationAttemptId}
+ import org.apache.hadoop.yarn.api.records.{Container, ContainerId, ContainerStatus}
+ import org.apache.hadoop.yarn.api.records.{Priority, Resource, ResourceRequest}
  import org.apache.hadoop.yarn.api.protocolrecords.{AllocateRequest, AllocateResponse}
  import org.apache.hadoop.yarn.util.{RackResolver, Records}
- import java.util.concurrent.{CopyOnWriteArrayList, ConcurrentHashMap}
- import java.util.concurrent.atomic.AtomicInteger
- import org.apache.hadoop.yarn.api.AMRMProtocol
- import collection.JavaConversions._
- import collection.mutable.{ArrayBuffer, HashMap, HashSet}
- import org.apache.hadoop.conf.Configuration
- import java.util.{Collections, Set => JSet}
- import java.lang.{Boolean => JBoolean}
+ 
  
 -object AllocationType extends Enumeration ("HOST", "RACK", "ANY") {
 +object AllocationType extends Enumeration {
    type AllocationType = Value
    val HOST, RACK, ANY = Value
  }
@@@ -209,9 -235,10 +235,10 @@@ private[yarn] class YarnAllocationHandl
            numWorkersRunning.decrementAndGet()
          }
          else {
-           // deallocate + allocate can result in reusing id's wrongly - so use a different counter (workerIdCounter)
+           // Deallocate + allocate can result in reusing id's wrongly - so use a different counter
+           // (workerIdCounter)
            val workerId = workerIdCounter.incrementAndGet().toString
 -          val driverUrl = "akka://spark@%s:%s/user/%s".format(
 +          val driverUrl = "akka.tcp://spark@%s:%s/user/%s".format(
              System.getProperty("spark.driver.host"), System.getProperty("spark.driver.port"),
              CoarseGrainedSchedulerBackend.ACTOR_NAME)
  


[15/50] git commit: Rename SparkActorSystem to IndestructibleActorSystem

Posted by pw...@apache.org.
Rename SparkActorSystem to IndestructibleActorSystem


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/5a864e3f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/5a864e3f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/5a864e3f

Branch: refs/heads/scala-2.10
Commit: 5a864e3fce234d19e1b371d9bab40554293546bb
Parents: f6c8c1c c9cd2af
Author: Aaron Davidson <aa...@databricks.com>
Authored: Fri Dec 6 00:16:40 2013 -0800
Committer: Aaron Davidson <aa...@databricks.com>
Committed: Fri Dec 6 00:21:43 2013 -0800

----------------------------------------------------------------------
 .../executor/CoarseGrainedExecutorBackend.scala |  3 +-
 .../cluster/SimrSchedulerBackend.scala          |  2 +-
 .../spark/storage/BlockObjectWriter.scala       |  2 +-
 .../scala/org/apache/spark/util/AkkaUtils.scala | 15 ++++--
 .../spark/util/IndestructibleActorSystem.scala  | 55 +++++++++++++++++++
 .../apache/spark/util/SparkActorSystem.scala    | 56 --------------------
 .../spark/deploy/yarn/WorkerLauncher.scala      |  2 +-
 7 files changed, 72 insertions(+), 63 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/5a864e3f/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
----------------------------------------------------------------------
diff --cc core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index dcb12be,406e015..debbdd4
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@@ -97,7 -97,8 +97,8 @@@ private[spark] object CoarseGrainedExec
  
      // Create a new ActorSystem to run the backend, because we can't create a SparkEnv / Executor
      // before getting started with all our system properties, etc
-     val (actorSystem, boundPort) = AkkaUtils.createActorSystem("sparkExecutor", hostname, 0)
+     val (actorSystem, boundPort) = AkkaUtils.createActorSystem("sparkExecutor", hostname, 0,
 -      useSparkAS = true)
++      indestructible = true)
      // set it
      val sparkHostPort = hostname + ":" + boundPort
      System.setProperty("spark.hostPort", sparkHostPort)

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/5a864e3f/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
----------------------------------------------------------------------
diff --cc core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
index b4451fc,b4451fc..df33f6b
--- a/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
@@@ -44,7 -44,7 +44,7 @@@ abstract class BlockObjectWriter(val bl
     * Flush the partial writes and commit them as a single atomic block. Return the
     * number of bytes written for this commit.
     */
--  def commit(): Long
++  def commit(): LongSpark
  
    /**
     * Reverts writes that haven't been flushed yet. Callers should invoke this function

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/5a864e3f/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala
----------------------------------------------------------------------
diff --cc core/src/main/scala/org/apache/spark/util/AkkaUtils.scala
index 407e9ff,f3e2644..9f3f163
--- a/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala
@@@ -17,7 -17,7 +17,7 @@@
  
  package org.apache.spark.util
  
--import akka.actor.{SparkActorSystem, ActorSystem, ExtendedActorSystem}
++import akka.actor.{IndestructibleActorSystem, ActorSystem, ExtendedActorSystem}
  import com.typesafe.config.ConfigFactory
  import scala.concurrent.duration._
  import scala.concurrent.Await
@@@ -34,8 -34,10 +34,13 @@@ private[spark] object AkkaUtils 
     *
     * Note: the `name` parameter is important, as even if a client sends a message to right
     * host + port, if the system name is incorrect, Akka will drop the message.
++   *
++   * If indestructible is set to true, the Actor System will continue running in the event
++   * of a fatal exception. This is used by [[org.apache.spark.executor.Executor]].
     */
-   def createActorSystem(name: String, host: String, port: Int): (ActorSystem, Int) = {
 -  def createActorSystem(name: String, host: String, port: Int,
 -    useSparkAS: Boolean = false): (ActorSystem, Int) = {
++  def createActorSystem(name: String, host: String, port: Int, indestructible: Boolean = false)
++    : (ActorSystem, Int) = {
+ 
      val akkaThreads   = System.getProperty("spark.akka.threads", "4").toInt
      val akkaBatchSize = System.getProperty("spark.akka.batchSize", "15").toInt
  
@@@ -70,7 -72,12 +75,11 @@@
        |akka.remote.log-remote-lifecycle-events = $lifecycleEvents
        """.stripMargin)
  
-     val actorSystem = SparkActorSystem(name, akkaConf)
 -    val actorSystem = if (useSparkAS) {
 -      SparkActorSystem(name, akkaConf)
 -    }
 -    else {
++    val actorSystem = if (indestructible) {
++      IndestructibleActorSystem(name, akkaConf)
++    } else {
+       ActorSystem(name, akkaConf)
+     }
  
      val provider = actorSystem.asInstanceOf[ExtendedActorSystem].provider
      val boundPort = provider.getDefaultAddress.port.get

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/5a864e3f/core/src/main/scala/org/apache/spark/util/IndestructibleActorSystem.scala
----------------------------------------------------------------------
diff --cc core/src/main/scala/org/apache/spark/util/IndestructibleActorSystem.scala
index 0000000,0000000..6951986
new file mode 100644
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/IndestructibleActorSystem.scala
@@@ -1,0 -1,0 +1,55 @@@
++/**
++ *  Copyright (C) 2009-2013 Typesafe Inc. <http://www.typesafe.com>
++ */
++
++// Must be in akka.actor package as ActorSystemImpl is protected[akka].
++package akka.actor
++
++import scala.util.control.{ControlThrowable, NonFatal}
++
++import com.typesafe.config.Config
++
++/**
++ * An [[akka.actor.ActorSystem]] which refuses to shut down in the event of a fatal exception.
++ * This is necessary as Spark Executors are allowed to recover from fatal exceptions
++ * (see [[org.apache.spark.executor.Executor]]).
++ */
++object IndestructibleActorSystem {
++  def apply(name: String, config: Config): ActorSystem =
++    apply(name, config, ActorSystem.findClassLoader())
++
++  def apply(name: String, config: Config, classLoader: ClassLoader): ActorSystem =
++    new IndestructibleActorSystemImpl(name, config, classLoader).start()
++}
++
++private[akka] class IndestructibleActorSystemImpl(
++    override val name: String,
++    applicationConfig: Config,
++    classLoader: ClassLoader)
++  extends ActorSystemImpl(name, applicationConfig, classLoader) {
++
++  protected override def uncaughtExceptionHandler: Thread.UncaughtExceptionHandler = {
++    val fallbackHandler = super.uncaughtExceptionHandler
++
++    new Thread.UncaughtExceptionHandler() {
++      def uncaughtException(thread: Thread, cause: Throwable): Unit = {
++        if (isFatalError(cause) && !settings.JvmExitOnFatalError) {
++          log.error(cause, "Uncaught fatal error from thread [{}] not shutting down " +
++            "ActorSystem [{}] tolerating and continuing.... ", thread.getName, name)
++          //shutdown()                 //TODO make it configurable
++        } else {
++          fallbackHandler.uncaughtException(thread, cause)
++        }
++      }
++    }
++  }
++
++  def isFatalError(e: Throwable): Boolean = {
++    e match {
++      case NonFatal(_) | _: InterruptedException | _: NotImplementedError | _: ControlThrowable =>
++        false
++      case _ =>
++        true
++    }
++  }
++}

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/5a864e3f/core/src/main/scala/org/apache/spark/util/SparkActorSystem.scala
----------------------------------------------------------------------
diff --cc core/src/main/scala/org/apache/spark/util/SparkActorSystem.scala
index a679fd6,d329063..0000000
deleted file mode 100644,100644
--- a/core/src/main/scala/org/apache/spark/util/SparkActorSystem.scala
+++ /dev/null
@@@ -1,56 -1,56 +1,0 @@@
--/**
-- *  Copyright (C) 2009-2013 Typesafe Inc. <http://www.typesafe.com>
-- */
--
--// Must be in akka.actor package as ActorSystemImpl is protected[akka].
--package akka.actor
--
--import scala.util.control.{ControlThrowable, NonFatal}
--
--import com.typesafe.config.Config
--
--/**
-- * An ActorSystem specific to Spark. Based off of [[akka.actor.ActorSystem]].
-- * The only change from the default system is that we do not shut down the ActorSystem
-- * in the event of a fatal exception. This is necessary as Spark is allowed to recover
-- * from fatal exceptions (see [[org.apache.spark.executor.Executor]]).
-- */
--object SparkActorSystem {
--  def apply(name: String, config: Config): ActorSystem =
--    apply(name, config, ActorSystem.findClassLoader())
--
--  def apply(name: String, config: Config, classLoader: ClassLoader): ActorSystem =
--    new SparkActorSystemImpl(name, config, classLoader).start()
--}
--
--private[akka] class SparkActorSystemImpl(
--    override val name: String,
--    applicationConfig: Config,
--    classLoader: ClassLoader)
--  extends ActorSystemImpl(name, applicationConfig, classLoader) {
--
--  protected override def uncaughtExceptionHandler: Thread.UncaughtExceptionHandler = {
--    val fallbackHandler = super.uncaughtExceptionHandler
--
--    new Thread.UncaughtExceptionHandler() {
--      def uncaughtException(thread: Thread, cause: Throwable): Unit = {
--        if (isFatalError(cause) && !settings.JvmExitOnFatalError) {
--          log.error(cause, "Uncaught fatal error from thread [{}] not shutting down " +
-             "ActorSystem tolerating and continuing.... [{}]", thread.getName, name)
-           //shutdown()                 //TODO make it configurable
-         } else {
-           fallbackHandler.uncaughtException(thread, cause)
-         }
-       }
-     }
-   }
- 
-   def isFatalError(e: Throwable): Boolean = {
-     e match {
-       case NonFatal(_) | _: InterruptedException | _: NotImplementedError | _: ControlThrowable =>
-         false
-       case _ =>
-         true
-     }
-   }
- }
 -            "ActorSystem [{}] tolerating and continuing.... ", thread.getName, name)
 -          //shutdown()                 //TODO make it configurable
 -        } else {
 -          fallbackHandler.uncaughtException(thread, cause)
 -        }
 -      }
 -    }
 -  }
 -
 -  def isFatalError(e: Throwable): Boolean = {
 -    e match {
 -      case NonFatal(_) | _: InterruptedException | _: NotImplementedError | _: ControlThrowable =>
 -        false
 -      case _ =>
 -        true
 -    }
 -  }
 -}


[10/50] git commit: Change the name of input ragument in ClusterScheduler#initialize from context to backend.

Posted by pw...@apache.org.
Change the name of input ragument in ClusterScheduler#initialize from context to backend.

The SchedulerBackend used to be called ClusterSchedulerContext so just want to make small
change of the input param in the ClusterScheduler#initialize to reflect this.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/1cb259cb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/1cb259cb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/1cb259cb

Branch: refs/heads/scala-2.10
Commit: 1cb259cb577bfd3385cca6bb187d7fee18bd2c24
Parents: 5d46025
Author: Henry Saputra <he...@platfora.com>
Authored: Thu Dec 5 18:50:26 2013 -0800
Committer: Henry Saputra <he...@platfora.com>
Committed: Thu Dec 5 18:50:26 2013 -0800

----------------------------------------------------------------------
 .../org/apache/spark/scheduler/cluster/ClusterScheduler.scala    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/1cb259cb/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterScheduler.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterScheduler.scala
index c1e65a3..f475d00 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterScheduler.scala
@@ -100,8 +100,8 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
     this.dagScheduler = dagScheduler
   }
 
-  def initialize(context: SchedulerBackend) {
-    backend = context
+  def initialize(backend: SchedulerBackend) {
+    this.backend = backend
     // temporarily set rootPool name to empty
     rootPool = new Pool("", schedulingMode, 0, 0)
     schedulableBuilder = {


[34/50] git commit: Merge pull request #240 from pwendell/master

Posted by pw...@apache.org.
Merge pull request #240 from pwendell/master

SPARK-917 Improve API links in nav bar


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/6494d62f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/6494d62f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/6494d62f

Branch: refs/heads/scala-2.10
Commit: 6494d62fe40ac408b14de3f0f3de8ec896a0ae6e
Parents: f466f79 dd331a6
Author: Patrick Wendell <pw...@gmail.com>
Authored: Sat Dec 7 11:56:16 2013 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Sat Dec 7 11:56:16 2013 -0800

----------------------------------------------------------------------
 docs/_layouts/global.html | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------



[43/50] git commit: Style fixes and addressed review comments at #221

Posted by pw...@apache.org.
Style fixes and addressed review comments at #221


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/17db6a90
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/17db6a90
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/17db6a90

Branch: refs/heads/scala-2.10
Commit: 17db6a9041d5e83d7b6fe47f9c36758d0613fcd6
Parents: c1201f4
Author: Prashant Sharma <pr...@imaginea.com>
Authored: Tue Dec 10 11:34:10 2013 +0530
Committer: Prashant Sharma <pr...@imaginea.com>
Committed: Tue Dec 10 11:47:16 2013 +0530

----------------------------------------------------------------------
 assembly/pom.xml                                | 14 +++++++-------
 bagel/pom.xml                                   | 10 +++++-----
 core/pom.xml                                    | 18 +++++++++---------
 .../org/apache/spark/MapOutputTracker.scala     |  9 ++++-----
 .../scala/org/apache/spark/Partitioner.scala    |  6 +++---
 .../spark/deploy/worker/ui/WorkerWebUI.scala    | 14 +++++---------
 .../org/apache/spark/rdd/CheckpointRDD.scala    | 11 ++++-------
 .../scala/org/apache/spark/rdd/JdbcRDD.scala    |  1 +
 .../scala/org/apache/spark/rdd/MappedRDD.scala  |  3 ++-
 .../apache/spark/rdd/OrderedRDDFunctions.scala  |  3 ++-
 .../org/apache/spark/rdd/ShuffledRDD.scala      |  3 ++-
 .../spark/storage/BlockManagerMaster.scala      |  7 +++----
 .../spark/util/IndestructibleActorSystem.scala  | 17 +++++++++++++++--
 examples/pom.xml                                | 20 ++++++++++----------
 mllib/pom.xml                                   | 10 +++++-----
 pom.xml                                         | 17 ++++++++---------
 repl-bin/pom.xml                                |  6 +++---
 repl/pom.xml                                    | 14 +++++++-------
 streaming/pom.xml                               | 16 ++++++++--------
 .../streaming/receivers/ZeroMQReceiver.scala    |  4 ++--
 tools/pom.xml                                   | 10 +++++-----
 yarn/pom.xml                                    |  8 ++++----
 22 files changed, 114 insertions(+), 107 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/17db6a90/assembly/pom.xml
----------------------------------------------------------------------
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 8103534..c2cda41 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -41,27 +41,27 @@
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_${scala-short.version}</artifactId>
+      <artifactId>spark-core_2.10</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-bagel_${scala-short.version}</artifactId>
+      <artifactId>spark-bagel_2.10</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-mllib_${scala-short.version}</artifactId>
+      <artifactId>spark-mllib_2.10</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-repl_${scala-short.version}</artifactId>
+      <artifactId>spark-repl_2.10</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming_${scala-short.version}</artifactId>
+      <artifactId>spark-streaming_2.10</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
@@ -79,7 +79,7 @@
         <artifactId>maven-shade-plugin</artifactId>
         <configuration>
           <shadedArtifactAttached>false</shadedArtifactAttached>
-          <outputFile>${project.build.directory}/scala-${scala-short.version}/${project.artifactId}-${project.version}-hadoop${hadoop.version}.jar</outputFile>
+          <outputFile>${project.build.directory}/scala-2.10/${project.artifactId}-${project.version}-hadoop${hadoop.version}.jar</outputFile>
           <artifactSet>
             <includes>
               <include>*:*</include>
@@ -128,7 +128,7 @@
       <dependencies>
         <dependency>
           <groupId>org.apache.spark</groupId>
-          <artifactId>spark-yarn_${scala-short.version}</artifactId>
+          <artifactId>spark-yarn_2.10</artifactId>
           <version>${project.version}</version>
         </dependency>
       </dependencies>

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/17db6a90/bagel/pom.xml
----------------------------------------------------------------------
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 461e76a..0f550d7 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -34,7 +34,7 @@
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_${scala-short.version}</artifactId>
+      <artifactId>spark-core_2.10</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
@@ -43,18 +43,18 @@
     </dependency>
     <dependency>
       <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala-short.version}</artifactId>
+      <artifactId>scalatest_2.10</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
-      <artifactId>scalacheck_${scala-short.version}</artifactId>
+      <artifactId>scalacheck_2.10</artifactId>
       <scope>test</scope>
     </dependency>
   </dependencies>
   <build>
-    <outputDirectory>target/scala-${scala-short.version}/classes</outputDirectory>
-    <testOutputDirectory>target/scala-${scala-short.version}/test-classes</testOutputDirectory>
+    <outputDirectory>target/scala-2.10/classes</outputDirectory>
+    <testOutputDirectory>target/scala-2.10/test-classes</testOutputDirectory>
     <plugins>
       <plugin>
         <groupId>org.scalatest</groupId>

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/17db6a90/core/pom.xml
----------------------------------------------------------------------
diff --git a/core/pom.xml b/core/pom.xml
index af605e1..71bf15c 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -86,7 +86,7 @@
     </dependency>
     <dependency>
       <groupId>com.twitter</groupId>
-      <artifactId>chill_${scala-short.version}</artifactId>
+      <artifactId>chill_2.10</artifactId>
       <version>0.3.1</version>
     </dependency>
     <dependency>
@@ -96,11 +96,11 @@
     </dependency>
     <dependency>
       <groupId>com.typesafe.akka</groupId>
-      <artifactId>akka-remote_${scala-short.version}</artifactId>
+      <artifactId>akka-remote_2.10</artifactId>
     </dependency>
     <dependency>
       <groupId>com.typesafe.akka</groupId>
-      <artifactId>akka-slf4j_${scala-short.version}</artifactId>
+      <artifactId>akka-slf4j_2.10</artifactId>
     </dependency>
     <dependency>
       <groupId>org.scala-lang</groupId>
@@ -108,7 +108,7 @@
     </dependency>
     <dependency>
       <groupId>net.liftweb</groupId>
-      <artifactId>lift-json_${scala-short.version}</artifactId>
+      <artifactId>lift-json_2.10</artifactId>
     </dependency>
     <dependency>
       <groupId>it.unimi.dsi</groupId>
@@ -120,7 +120,7 @@
     </dependency>
     <dependency>
       <groupId>com.github.scala-incubator.io</groupId>
-      <artifactId>scala-io-file_${scala-short.version}</artifactId>
+      <artifactId>scala-io-file_2.10</artifactId>
     </dependency>
     <dependency>
       <groupId>org.apache.mesos</groupId>
@@ -166,12 +166,12 @@
     </dependency>
     <dependency>
       <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala-short.version}</artifactId>
+      <artifactId>scalatest_2.10</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
-      <artifactId>scalacheck_${scala-short.version}</artifactId>
+      <artifactId>scalacheck_2.10</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
@@ -191,8 +191,8 @@
     </dependency>
   </dependencies>
   <build>
-    <outputDirectory>target/scala-${scala-short.version}/classes</outputDirectory>
-    <testOutputDirectory>target/scala-${scala-short.version}/test-classes</testOutputDirectory>
+    <outputDirectory>target/scala-2.10/classes</outputDirectory>
+    <testOutputDirectory>target/scala-2.10/test-classes</testOutputDirectory>
     <plugins>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/17db6a90/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index 88a7f24..d36e1b1 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -72,12 +72,11 @@ private[spark] class MapOutputTracker extends Logging {
   // throw a SparkException if this fails.
   private def askTracker(message: Any): Any = {
     try {
-      val future = if (trackerActor.isLeft ) {
-        trackerActor.left.get.ask(message)(timeout)
-      } else {
-        trackerActor.right.get.ask(message)(timeout)
+      val future = trackerActor match {
+        case Left(a: ActorRef) => a.ask(message)(timeout)
+        case Right(b: ActorSelection) => b.ask(message)(timeout)
       }
-      return Await.result(future, timeout)
+      Await.result(future, timeout)
     } catch {
       case e: Exception =>
         throw new SparkException("Error communicating with MapOutputTracker", e)

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/17db6a90/core/src/main/scala/org/apache/spark/Partitioner.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/Partitioner.scala b/core/src/main/scala/org/apache/spark/Partitioner.scala
index 62b608c..bcec41c 100644
--- a/core/src/main/scala/org/apache/spark/Partitioner.scala
+++ b/core/src/main/scala/org/apache/spark/Partitioner.scala
@@ -17,11 +17,11 @@
 
 package org.apache.spark
 
-import org.apache.spark.util.Utils
-import org.apache.spark.rdd.RDD
-
 import scala.reflect.ClassTag
 
+import org.apache.spark.rdd.RDD
+import org.apache.spark.util.Utils
+
 /**
  * An object that defines how the elements in a key-value pair RDD are partitioned by key.
  * Maps each key to a partition ID, from 0 to `numPartitions - 1`.

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/17db6a90/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
index a38e32b..6c18a3c 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
@@ -17,23 +17,19 @@
 
 package org.apache.spark.deploy.worker.ui
 
-import akka.actor.ActorRef
-import akka.util.Timeout
+import java.io.File
 
 import scala.concurrent.duration._
 
-import java.io.{FileInputStream, File}
-
+import akka.util.Timeout
 import javax.servlet.http.HttpServletRequest
 
-import org.eclipse.jetty.server.{Handler, Server}
-
+import org.apache.spark.Logging
 import org.apache.spark.deploy.worker.Worker
-import org.apache.spark.{Logging}
-import org.apache.spark.ui.JettyUtils
+import org.apache.spark.ui.{JettyUtils, UIUtils}
 import org.apache.spark.ui.JettyUtils._
-import org.apache.spark.ui.UIUtils
 import org.apache.spark.util.Utils
+import org.eclipse.jetty.server.{Handler, Server}
 
 /**
  * Web UI server for the standalone worker.

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/17db6a90/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala
index 99ea6e8..a712ef1 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala
@@ -17,16 +17,13 @@
 
 package org.apache.spark.rdd
 
+import java.io.IOException
+
 import scala.reflect.ClassTag
+
+import org.apache.hadoop.fs.Path
 import org.apache.spark._
 import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.hadoop.mapred.{FileInputFormat, SequenceFileInputFormat, JobConf, Reporter}
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.io.{NullWritable, BytesWritable}
-import org.apache.hadoop.util.ReflectionUtils
-import org.apache.hadoop.fs.Path
-import java.io.{File, IOException, EOFException}
-import java.text.NumberFormat
 
 private[spark] class CheckpointRDDPartition(val index: Int) extends Partition {}
 

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/17db6a90/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
index e72f86f..8df8718 100644
--- a/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
@@ -20,6 +20,7 @@ package org.apache.spark.rdd
 import java.sql.{Connection, ResultSet}
 
 import scala.reflect.ClassTag
+
 import org.apache.spark.{Logging, Partition, SparkContext, TaskContext}
 import org.apache.spark.util.NextIterator
 

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/17db6a90/core/src/main/scala/org/apache/spark/rdd/MappedRDD.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/MappedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/MappedRDD.scala
index eb3b199..8d7c288 100644
--- a/core/src/main/scala/org/apache/spark/rdd/MappedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/MappedRDD.scala
@@ -17,9 +17,10 @@
 
 package org.apache.spark.rdd
 
-import org.apache.spark.{Partition, TaskContext}
 import scala.reflect.ClassTag
 
+import org.apache.spark.{Partition, TaskContext}
+
 private[spark]
 class MappedRDD[U: ClassTag, T: ClassTag](prev: RDD[T], f: T => U)
   extends RDD[U](prev) {

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/17db6a90/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala
index 4a46584..d5691f2 100644
--- a/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala
@@ -17,9 +17,10 @@
 
 package org.apache.spark.rdd
 
-import org.apache.spark.{RangePartitioner, Logging}
 import scala.reflect.ClassTag
 
+import org.apache.spark.{Logging, RangePartitioner}
+
 /**
  * Extra functions available on RDDs of (key, value) pairs where the key is sortable through
  * an implicit conversion. Import `org.apache.spark.SparkContext._` at the top of your program to

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/17db6a90/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
index 1d109a2..3682c84 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
@@ -17,9 +17,10 @@
 
 package org.apache.spark.rdd
 
-import org.apache.spark.{Dependency, Partitioner, SparkEnv, ShuffleDependency, Partition, TaskContext}
 import scala.reflect.ClassTag
 
+import org.apache.spark.{Dependency, Partition, Partitioner, ShuffleDependency,
+  SparkEnv, TaskContext}
 
 private[spark] class ShuffledRDDPartition(val idx: Int) extends Partition {
   override val index = idx

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/17db6a90/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
index e5de16f..e05b842 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
@@ -157,10 +157,9 @@ private[spark] class BlockManagerMaster(var driverActor : Either[ActorRef, Actor
     while (attempts < AKKA_RETRY_ATTEMPTS) {
       attempts += 1
       try {
-        val future = if (driverActor.isLeft ) {
-          driverActor.left.get.ask(message)(timeout)
-        } else {
-          driverActor.right.get.ask(message)(timeout)
+        val future = driverActor match {
+          case Left(a: ActorRef) => a.ask(message)(timeout)
+          case Right(b: ActorSelection) => b.ask(message)(timeout)
         }
         val result = Await.result(future, timeout)
         if (result == null) {

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/17db6a90/core/src/main/scala/org/apache/spark/util/IndestructibleActorSystem.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/util/IndestructibleActorSystem.scala b/core/src/main/scala/org/apache/spark/util/IndestructibleActorSystem.scala
index 6951986..bf71882 100644
--- a/core/src/main/scala/org/apache/spark/util/IndestructibleActorSystem.scala
+++ b/core/src/main/scala/org/apache/spark/util/IndestructibleActorSystem.scala
@@ -1,5 +1,18 @@
-/**
- *  Copyright (C) 2009-2013 Typesafe Inc. <http://www.typesafe.com>
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 
 // Must be in akka.actor package as ActorSystemImpl is protected[akka].

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/17db6a90/examples/pom.xml
----------------------------------------------------------------------
diff --git a/examples/pom.xml b/examples/pom.xml
index 464ad82..97f6dfe 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -49,25 +49,25 @@
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_${scala-short.version}</artifactId>
+      <artifactId>spark-core_2.10</artifactId>
       <version>${project.version}</version>
       <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming_${scala-short.version}</artifactId>
+      <artifactId>spark-streaming_2.10</artifactId>
       <version>${project.version}</version>
       <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-mllib_${scala-short.version}</artifactId>
+      <artifactId>spark-mllib_2.10</artifactId>
       <version>${project.version}</version>
       <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-bagel_${scala-short.version}</artifactId>
+      <artifactId>spark-bagel_2.10</artifactId>
       <version>${project.version}</version>
       <scope>provided</scope>
     </dependency>
@@ -88,7 +88,7 @@
     </dependency>
     <dependency>
       <groupId>org.apache.kafka</groupId>
-      <artifactId>kafka_2.9.2</artifactId>
+      <artifactId>kafka_2.10</artifactId>
       <version>0.8.0-beta1</version>
       <exclusions>
         <exclusion>
@@ -107,17 +107,17 @@
     </dependency>
     <dependency>
       <groupId>com.twitter</groupId>
-      <artifactId>algebird-core_${scala-short.version}</artifactId>
+      <artifactId>algebird-core_2.10</artifactId>
       <version>0.1.11</version>
     </dependency>
     <dependency>
       <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala-short.version}</artifactId>
+      <artifactId>scalatest_2.10</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
-      <artifactId>scalacheck_${scala-short.version}</artifactId>
+      <artifactId>scalacheck_2.10</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
@@ -166,8 +166,8 @@
   </dependencies>
 
   <build>
-    <outputDirectory>target/scala-${scala-short.version}/classes</outputDirectory>
-    <testOutputDirectory>target/scala-${scala-short.version}/test-classes</testOutputDirectory>
+    <outputDirectory>target/scala-2.10/classes</outputDirectory>
+    <testOutputDirectory>target/scala-2.10/test-classes</testOutputDirectory>
     <plugins>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/17db6a90/mllib/pom.xml
----------------------------------------------------------------------
diff --git a/mllib/pom.xml b/mllib/pom.xml
index fce5b19..228f8c0 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -34,7 +34,7 @@
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_${scala-short.version}</artifactId>
+      <artifactId>spark-core_2.10</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
@@ -48,12 +48,12 @@
     </dependency>
     <dependency>
       <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala-short.version}</artifactId>
+      <artifactId>scalatest_2.10</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
-      <artifactId>scalacheck_${scala-short.version}</artifactId>
+      <artifactId>scalacheck_2.10</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
@@ -63,8 +63,8 @@
     </dependency>
   </dependencies>
   <build>
-    <outputDirectory>target/scala-${scala-short.version}/classes</outputDirectory>
-    <testOutputDirectory>target/scala-${scala-short.version}/test-classes</testOutputDirectory>
+    <outputDirectory>target/scala-2.10/classes</outputDirectory>
+    <testOutputDirectory>target/scala-2.10/test-classes</testOutputDirectory>
     <plugins>
       <plugin>
         <groupId>org.scalatest</groupId>

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/17db6a90/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 4c11459..979fd0c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -101,7 +101,6 @@
 
     <java.version>1.6</java.version>
 
-    <scala-short.version>2.10</scala-short.version>
     <scala.version>2.10.3</scala.version>
     <mesos.version>0.13.0</mesos.version>
     <akka.version>2.2.3</akka.version>
@@ -205,7 +204,7 @@
       </dependency>
       <dependency>
         <groupId>com.twitter</groupId>
-        <artifactId>chill_${scala-short.version}</artifactId>
+        <artifactId>chill_2.10</artifactId>
         <version>0.3.1</version>
       </dependency>
       <dependency>
@@ -215,7 +214,7 @@
       </dependency>
       <dependency>
         <groupId>com.typesafe.akka</groupId>
-        <artifactId>akka-actor_${scala-short.version}</artifactId>
+        <artifactId>akka-actor_2.10</artifactId>
         <version>${akka.version}</version>
         <exclusions>
           <exclusion>
@@ -226,7 +225,7 @@
       </dependency>
       <dependency>
         <groupId>com.typesafe.akka</groupId>
-        <artifactId>akka-remote_${scala-short.version}</artifactId>
+        <artifactId>akka-remote_2.10</artifactId>
         <version>${akka.version}</version>
         <exclusions>
           <exclusion>
@@ -237,7 +236,7 @@
       </dependency>
       <dependency>
         <groupId>com.typesafe.akka</groupId>
-        <artifactId>akka-slf4j_${scala-short.version}</artifactId>
+        <artifactId>akka-slf4j_2.10</artifactId>
         <version>${akka.version}</version>
         <exclusions>
           <exclusion>
@@ -258,7 +257,7 @@
       </dependency>
       <dependency>
         <groupId>com.github.scala-incubator.io</groupId>
-        <artifactId>scala-io-file_${scala-short.version}</artifactId>
+        <artifactId>scala-io-file_2.10</artifactId>
         <version>0.4.1</version>
       </dependency>
       <dependency>
@@ -279,7 +278,7 @@
       </dependency>
       <dependency>
         <groupId>net.liftweb</groupId>
-        <artifactId>lift-json_${scala-short.version}</artifactId>
+        <artifactId>lift-json_2.10</artifactId>
         <version>2.5.1</version>
         <exclusions>
           <exclusion>
@@ -335,7 +334,7 @@
       </dependency>
       <dependency>
         <groupId>org.scalatest</groupId>
-        <artifactId>scalatest_${scala-short.version}</artifactId>
+        <artifactId>scalatest_2.10</artifactId>
         <version>1.9.1</version>
         <scope>test</scope>
       </dependency>
@@ -358,7 +357,7 @@
       </dependency>
       <dependency>
         <groupId>org.scalacheck</groupId>
-        <artifactId>scalacheck_${scala-short.version}</artifactId>
+        <artifactId>scalacheck_2.10</artifactId>
         <version>1.10.0</version>
         <scope>test</scope>
       </dependency>

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/17db6a90/repl-bin/pom.xml
----------------------------------------------------------------------
diff --git a/repl-bin/pom.xml b/repl-bin/pom.xml
index f8a17d9..c2a4efa 100644
--- a/repl-bin/pom.xml
+++ b/repl-bin/pom.xml
@@ -40,18 +40,18 @@
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_${scala-short.version}</artifactId>
+      <artifactId>spark-core_2.10</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-bagel_${scala-short.version}</artifactId>
+      <artifactId>spark-bagel_2.10</artifactId>
       <version>${project.version}</version>
       <scope>runtime</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-repl_${scala-short.version}</artifactId>
+      <artifactId>spark-repl_2.10</artifactId>
       <version>${project.version}</version>
       <scope>runtime</scope>
     </dependency>

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/17db6a90/repl/pom.xml
----------------------------------------------------------------------
diff --git a/repl/pom.xml b/repl/pom.xml
index 2f27e76..bf06d73 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -39,18 +39,18 @@
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_${scala-short.version}</artifactId>
+      <artifactId>spark-core_2.10</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-bagel_${scala-short.version}</artifactId>
+      <artifactId>spark-bagel_2.10</artifactId>
       <version>${project.version}</version>
       <scope>runtime</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-mllib_${scala-short.version}</artifactId>
+      <artifactId>spark-mllib_2.10</artifactId>
       <version>${project.version}</version>
       <scope>runtime</scope>
     </dependency>
@@ -78,18 +78,18 @@
     </dependency>
     <dependency>
       <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala-short.version}</artifactId>
+      <artifactId>scalatest_2.10</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
-      <artifactId>scalacheck_${scala-short.version}</artifactId>
+      <artifactId>scalacheck_2.10</artifactId>
       <scope>test</scope>
     </dependency>
   </dependencies>
   <build>
-    <outputDirectory>target/scala-${scala-short.version}/classes</outputDirectory>
-    <testOutputDirectory>target/scala-${scala-short.version}/test-classes</testOutputDirectory>
+    <outputDirectory>target/scala-2.10/classes</outputDirectory>
+    <testOutputDirectory>target/scala-2.10/test-classes</testOutputDirectory>
     <plugins>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/17db6a90/streaming/pom.xml
----------------------------------------------------------------------
diff --git a/streaming/pom.xml b/streaming/pom.xml
index ff95591..298bc83 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -48,7 +48,7 @@
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_${scala-short.version}</artifactId>
+      <artifactId>spark-core_2.10</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
@@ -61,8 +61,8 @@
       <version>1.9.11</version>
     </dependency>
     <dependency>
-      <groupId>org.apache.kafka</groupId>
-      <artifactId>kafka_2.9.2</artifactId>
+      <groupId>com.sksamuel.kafka</groupId>
+      <artifactId>kafka_2.10</artifactId>
       <version>0.8.0-beta1</version>
       <exclusions>
         <exclusion>
@@ -111,7 +111,7 @@
     </dependency>
     <dependency>
       <groupId>com.typesafe.akka</groupId>
-      <artifactId>akka-zeromq_${scala-short.version}</artifactId>
+      <artifactId>akka-zeromq_2.10</artifactId>
       <version>${akka.version}</version>
       <exclusions>
         <exclusion>
@@ -122,12 +122,12 @@
     </dependency>
     <dependency>
       <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala-short.version}</artifactId>
+      <artifactId>scalatest_2.10</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
-      <artifactId>scalacheck_${scala-short.version}</artifactId>
+      <artifactId>scalacheck_2.10</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
@@ -151,8 +151,8 @@
     </dependency>
   </dependencies>
   <build>
-    <outputDirectory>target/scala-${scala-short.version}/classes</outputDirectory>
-    <testOutputDirectory>target/scala-${scala-short.version}/test-classes</testOutputDirectory>
+    <outputDirectory>target/scala-2.10/classes</outputDirectory>
+    <testOutputDirectory>target/scala-2.10/test-classes</testOutputDirectory>
     <plugins>
       <plugin>
         <groupId>org.scalatest</groupId>

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/17db6a90/streaming/src/main/scala/org/apache/spark/streaming/receivers/ZeroMQReceiver.scala
----------------------------------------------------------------------
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receivers/ZeroMQReceiver.scala b/streaming/src/main/scala/org/apache/spark/streaming/receivers/ZeroMQReceiver.scala
index ce8c56f..f164d51 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receivers/ZeroMQReceiver.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receivers/ZeroMQReceiver.scala
@@ -17,14 +17,14 @@
 
 package org.apache.spark.streaming.receivers
 
+import scala.reflect.ClassTag
+
 import akka.actor.Actor
 import akka.util.ByteString
 import akka.zeromq._
 
 import org.apache.spark.Logging
 
-import scala.reflect.ClassTag
-
 /**
  * A receiver to subscribe to ZeroMQ stream.
  */

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/17db6a90/tools/pom.xml
----------------------------------------------------------------------
diff --git a/tools/pom.xml b/tools/pom.xml
index 353d201..2bad494 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -33,24 +33,24 @@
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_${scala-short.version}</artifactId>
+      <artifactId>spark-core_2.10</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming_${scala-short.version}</artifactId>
+      <artifactId>spark-streaming_2.10</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala-short.version}</artifactId>
+      <artifactId>scalatest_2.10</artifactId>
       <scope>test</scope>
     </dependency>
   </dependencies>
 
   <build>
-    <outputDirectory>target/scala-${scala-short.version}/classes</outputDirectory>
-    <testOutputDirectory>target/scala-${scala-short.version}/test-classes</testOutputDirectory>
+    <outputDirectory>target/scala-2.10/classes</outputDirectory>
+    <testOutputDirectory>target/scala-2.10/test-classes</testOutputDirectory>
     <plugins>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/17db6a90/yarn/pom.xml
----------------------------------------------------------------------
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 5cf8157..d18ac37 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -33,7 +33,7 @@
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_${scala-short.version}</artifactId>
+      <artifactId>spark-core_2.10</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
@@ -63,7 +63,7 @@
     </dependency>
     <dependency>
       <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_2.9.3</artifactId>
+      <artifactId>scalatest_2.10</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
@@ -74,8 +74,8 @@
   </dependencies>
 
   <build>
-    <outputDirectory>target/scala-${scala-short.version}/classes</outputDirectory>
-    <testOutputDirectory>target/scala-${scala-short.version}/test-classes</testOutputDirectory>
+    <outputDirectory>target/scala-2.10/classes</outputDirectory>
+    <testOutputDirectory>target/scala-2.10/test-classes</testOutputDirectory>
     <plugins>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>


[40/50] git commit: fixed yarn build

Posted by pw...@apache.org.
fixed yarn build


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/c1201f47
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/c1201f47
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/c1201f47

Branch: refs/heads/scala-2.10
Commit: c1201f47e0d44e92da42adb23d27f60d9d494642
Parents: 7ad6921
Author: Prashant Sharma <pr...@imaginea.com>
Authored: Mon Dec 9 12:55:19 2013 +0530
Committer: Prashant Sharma <pr...@imaginea.com>
Committed: Mon Dec 9 13:00:50 2013 +0530

----------------------------------------------------------------------
 .../apache/spark/deploy/yarn/WorkerLauncher.scala    | 15 ++++++---------
 .../spark/deploy/yarn/YarnAllocationHandler.scala    |  4 ++--
 2 files changed, 8 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/c1201f47/yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala
----------------------------------------------------------------------
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala
index b67e068..6903884 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala
@@ -27,10 +27,8 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.ipc.YarnRPC
 import org.apache.hadoop.yarn.util.{ConverterUtils, Records}
 import akka.actor._
-import akka.remote.{RemoteClientShutdown, RemoteClientDisconnected, RemoteClientLifeCycleEvent}
-import akka.remote.RemoteClientShutdown
+import akka.remote._
 import akka.actor.Terminated
-import akka.remote.RemoteClientDisconnected
 import org.apache.spark.{SparkContext, Logging}
 import org.apache.spark.util.{Utils, AkkaUtils}
 import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
@@ -55,19 +53,18 @@ class WorkerLauncher(args: ApplicationMasterArguments, conf: Configuration) exte
   // This actor just working as a monitor to watch on Driver Actor.
   class MonitorActor(driverUrl: String) extends Actor {
 
-    var driver: ActorRef = null
+    var driver: ActorSelection = null
 
     override def preStart() {
       logInfo("Listen to driver: " + driverUrl)
-      driver = context.actorFor(driverUrl)
+      driver = context.actorSelection(driverUrl)
       driver ! "hello"
-      context.system.eventStream.subscribe(self, classOf[RemoteClientLifeCycleEvent])
-      context.watch(driver) // Doesn't work with remote actors, but useful for testing
+      context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent])
     }
 
     override def receive = {
-      case Terminated(_) | RemoteClientDisconnected(_, _) | RemoteClientShutdown(_, _) =>
-        logInfo("Driver terminated or disconnected! Shutting down.")
+      case x: DisassociatedEvent =>
+        logInfo(s"Driver terminated or disconnected! Shutting down. $x")
         driverClosed = true
     }
   }

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/c1201f47/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
----------------------------------------------------------------------
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
index baa030b..a6ce1b6 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
@@ -34,7 +34,7 @@ import org.apache.hadoop.conf.Configuration
 import java.util.{Collections, Set => JSet}
 import java.lang.{Boolean => JBoolean}
 
-object AllocationType extends Enumeration ("HOST", "RACK", "ANY") {
+object AllocationType extends Enumeration {
   type AllocationType = Value
   val HOST, RACK, ANY = Value
 }
@@ -370,7 +370,7 @@ private[yarn] class YarnAllocationHandler(val conf: Configuration, val resourceM
         createResourceRequest(AllocationType.ANY, null, numWorkers, YarnAllocationHandler.PRIORITY)
 
       val containerRequests: ArrayBuffer[ResourceRequest] =
-        new ArrayBuffer[ResourceRequest](hostContainerRequests.size() + rackContainerRequests.size() + 1)
+        new ArrayBuffer[ResourceRequest](hostContainerRequests.size + rackContainerRequests.size + 1)
 
       containerRequests ++= hostContainerRequests
       containerRequests ++= rackContainerRequests


[24/50] git commit: Minor doc fixes and updating README

Posted by pw...@apache.org.
Minor doc fixes and updating README


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/bb6e25c6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/bb6e25c6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/bb6e25c6

Branch: refs/heads/scala-2.10
Commit: bb6e25c663a0fa96552994bcdda2049e9b621db7
Parents: 241336a
Author: Patrick Wendell <pw...@gmail.com>
Authored: Fri Dec 6 17:41:27 2013 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Fri Dec 6 17:42:28 2013 -0800

----------------------------------------------------------------------
 README.md                   | 7 ++++++-
 docs/building-with-maven.md | 4 +++-
 docs/index.md               | 2 +-
 3 files changed, 10 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/bb6e25c6/README.md
----------------------------------------------------------------------
diff --git a/README.md b/README.md
index 456b806..8c7853e 100644
--- a/README.md
+++ b/README.md
@@ -55,7 +55,7 @@ versions without YARN, use:
     # Cloudera CDH 4.2.0 with MapReduce v1
     $ SPARK_HADOOP_VERSION=2.0.0-mr1-cdh4.2.0 sbt/sbt assembly
 
-For Apache Hadoop 2.x, 0.23.x, Cloudera CDH MRv2, and other Hadoop versions
+For Apache Hadoop 2.0.X, 2.1.X, 0.23.x, Cloudera CDH MRv2, and other Hadoop versions
 with YARN, also set `SPARK_YARN=true`:
 
     # Apache Hadoop 2.0.5-alpha
@@ -64,6 +64,11 @@ with YARN, also set `SPARK_YARN=true`:
     # Cloudera CDH 4.2.0 with MapReduce v2
     $ SPARK_HADOOP_VERSION=2.0.0-cdh4.2.0 SPARK_YARN=true sbt/sbt assembly
 
+When building for Hadoop 2.2.X and newer, you'll need to include the additional `new-yarn` profile:
+
+    # Apache Hadoop 2.2.X and newer
+    $ mvn -Dyarn.version=2.2.0 -Dhadoop.version=2.2.0 -Pnew-yarn
+
 For convenience, these variables may also be set through the `conf/spark-env.sh` file
 described below.
 

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/bb6e25c6/docs/building-with-maven.md
----------------------------------------------------------------------
diff --git a/docs/building-with-maven.md b/docs/building-with-maven.md
index a508786..c709001 100644
--- a/docs/building-with-maven.md
+++ b/docs/building-with-maven.md
@@ -46,7 +46,9 @@ For Apache Hadoop 2.x, 0.23.x, Cloudera CDH MRv2, and other Hadoop versions with
     $ mvn -Phadoop2-yarn -Dhadoop.version=2.0.0-cdh4.2.0 -Dyarn.version=2.0.0-chd4.2.0 -DskipTests clean package
 
 Hadoop versions 2.2.x and newer can be built by setting the ```new-yarn``` and the ```yarn.version``` as follows:
-       mvn -Dyarn.version=2.2.0 -Dhadoop.version=2.2.0 -Pnew-yarn
+ 
+    # Apache Hadoop 2.2.X and newer
+    $ mvn -Dyarn.version=2.2.0 -Dhadoop.version=2.2.0 -Pnew-yarn
 
 The build process handles Hadoop 2.2.x as a special case that uses the directory ```new-yarn```, which supports the new YARN API. Furthermore, for this version, the build depends on artifacts published by the spark-project to enable Akka 2.0.5 to work with protobuf 2.5. 
 

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/bb6e25c6/docs/index.md
----------------------------------------------------------------------
diff --git a/docs/index.md b/docs/index.md
index bbb2733..45616f7 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -65,7 +65,7 @@ In addition, if you wish to run Spark on [YARN](running-on-yarn.md), set
 
 Note that on Windows, you need to set the environment variables on separate lines, e.g., `set SPARK_HADOOP_VERSION=1.2.1`.
 
-For this version of Spark (0.8.1) Hadoop 2.2.x (or newer) users will have to build Spark and publish it locally. See [Launching Spark on YARN](running-on-yarn.md). This is needed because Hadoop 2.2 has non backwards compatible API changes.
+For this version of Spark (0.8.1) Hadoop 2.2.x (or newer) users will have to build Spark and publish it locally. See [Launching Spark on YARN](running-on-yarn.html). This is needed because Hadoop 2.2 has non backwards compatible API changes.
 
 # Where to Go from Here
 


[19/50] git commit: Merge pull request #233 from hsaputra/changecontexttobackend

Posted by pw...@apache.org.
Merge pull request #233 from hsaputra/changecontexttobackend

Change the name of input argument in ClusterScheduler#initialize from context to backend.

The SchedulerBackend used to be called ClusterSchedulerContext so just want to make small
change of the input param in the ClusterScheduler#initialize to reflect this.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/bfa68609
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/bfa68609
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/bfa68609

Branch: refs/heads/scala-2.10
Commit: bfa68609d98ca3d7c22c233b3db9e39df6f1c170
Parents: 3fb302c 1cb259c
Author: Matei Zaharia <ma...@eecs.berkeley.edu>
Authored: Fri Dec 6 11:04:03 2013 -0800
Committer: Matei Zaharia <ma...@eecs.berkeley.edu>
Committed: Fri Dec 6 11:04:03 2013 -0800

----------------------------------------------------------------------
 .../org/apache/spark/scheduler/cluster/ClusterScheduler.scala    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------



[20/50] git commit: Merge pull request #190 from markhamstra/Stages4Jobs

Posted by pw...@apache.org.
Merge pull request #190 from markhamstra/Stages4Jobs

stageId <--> jobId mapping in DAGScheduler

Okay, I think this one is ready to go -- or at least it's ready for review and discussion.  It's a carry-over of https://github.com/mesos/spark/pull/842 with updates for the newer job cancellation functionality.  The prior discussion still applies.  I've actually changed the job cancellation flow a bit: Instead of ``cancelTasks`` going to the TaskScheduler and then ``taskSetFailed`` coming back to the DAGScheduler (resulting in ``abortStage`` there), the DAGScheduler now takes care of figuring out which stages should be cancelled, tells the TaskScheduler to cancel tasks for those stages, then does the cleanup within the DAGScheduler directly without the need for any further prompting by the TaskScheduler.

I know of three outstanding issues, each of which can and should, I believe, be handled in follow-up pull requests:

1) https://spark-project.atlassian.net/browse/SPARK-960
2) JobLogger should be re-factored to eliminate duplication
3) Related to 2), the WebUI should also become a consumer of the DAGScheduler's new understanding of the relationship between jobs and stages so that it can display progress indication and the like grouped by job.  Right now, some of this information is just being sent out as part of ``SparkListenerJobStart`` messages, but more or different job <--> stage information may need to be exported from the DAGScheduler to meet listeners needs.

Except for the eventQueue -> Actor commit, the rest can be cherry-picked almost cleanly into branch-0.8.  A little merging is needed in MapOutputTracker and the DAGScheduler.  Merged versions of those files are in https://github.com/markhamstra/incubator-spark/tree/aba2b40ce04ee9b7b9ea260abb6f09e050142d43

Note that between the recent Actor change in the DAGScheduler and the cleaning up of DAGScheduler data structures on job completion in this PR, some races have been introduced into the DAGSchedulerSuite.  Those tests usually pass, and I don't think that better-behaved code that doesn't directly inspect DAGScheduler data structures should be seeing any problems, but I'll work on fixing DAGSchedulerSuite as either an addition to this PR or as a separate request.

UPDATE: Fixed the race that I introduced.  Created a JIRA issue (SPARK-965) for the one that was introduced with the switch to eventProcessorActor in the DAGScheduler.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/e0392343
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/e0392343
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/e0392343

Branch: refs/heads/scala-2.10
Commit: e0392343a026d632bac0df0ad4f399fce742c151
Parents: bfa6860 403234d
Author: Matei Zaharia <ma...@eecs.berkeley.edu>
Authored: Fri Dec 6 11:49:59 2013 -0800
Committer: Matei Zaharia <ma...@eecs.berkeley.edu>
Committed: Fri Dec 6 11:49:59 2013 -0800

----------------------------------------------------------------------
 .../org/apache/spark/MapOutputTracker.scala     |   8 +-
 .../apache/spark/scheduler/DAGScheduler.scala   | 278 +++++++++++++++----
 .../spark/scheduler/DAGSchedulerEvent.scala     |   3 +-
 .../apache/spark/scheduler/SparkListener.scala  |   2 +-
 .../scheduler/cluster/ClusterScheduler.scala    |   4 +-
 .../cluster/ClusterTaskSetManager.scala         |   2 +-
 .../spark/scheduler/local/LocalScheduler.scala  |  27 +-
 .../org/apache/spark/JobCancellationSuite.scala |   4 +-
 .../spark/scheduler/DAGSchedulerSuite.scala     |  43 ++-
 9 files changed, 280 insertions(+), 91 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/e0392343/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterScheduler.scala
----------------------------------------------------------------------


[39/50] git commit: Merge pull request #195 from dhardy92/fix_DebScriptPackage

Posted by pw...@apache.org.
Merge pull request #195 from dhardy92/fix_DebScriptPackage

[Deb] fix package of Spark classes adding org.apache prefix in scripts embeded in .deb


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/d992ec6d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/d992ec6d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/d992ec6d

Branch: refs/heads/scala-2.10
Commit: d992ec6d9be30e624c8edb2a50c193ac3cfbab7a
Parents: 1f4a4bc 92c7cc0
Author: Patrick Wendell <pw...@gmail.com>
Authored: Sun Dec 8 20:49:20 2013 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Sun Dec 8 20:49:20 2013 -0800

----------------------------------------------------------------------
 repl-bin/src/deb/bin/spark-executor | 2 +-
 repl-bin/src/deb/bin/spark-shell    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------



[16/50] git commit: Fix long lines

Posted by pw...@apache.org.
Fix long lines


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/94b5881e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/94b5881e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/94b5881e

Branch: refs/heads/scala-2.10
Commit: 94b5881ee9d6c67f096ea9c2891a63978f256394
Parents: 5a864e3
Author: Aaron Davidson <aa...@databricks.com>
Authored: Fri Dec 6 00:22:00 2013 -0800
Committer: Aaron Davidson <aa...@databricks.com>
Committed: Fri Dec 6 00:22:00 2013 -0800

----------------------------------------------------------------------
 .../main/scala/org/apache/spark/deploy/client/Client.scala   | 6 ++++--
 .../main/scala/org/apache/spark/deploy/worker/Worker.scala   | 6 ++++--
 .../scala/org/apache/spark/storage/BlockObjectWriter.scala   | 2 +-
 core/src/main/scala/org/apache/spark/util/AkkaUtils.scala    | 8 +++-----
 4 files changed, 12 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/94b5881e/core/src/main/scala/org/apache/spark/deploy/client/Client.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/deploy/client/Client.scala b/core/src/main/scala/org/apache/spark/deploy/client/Client.scala
index f60e56d..d0d65ca 100644
--- a/core/src/main/scala/org/apache/spark/deploy/client/Client.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/client/Client.scala
@@ -104,8 +104,10 @@ private[spark] class Client(
       activeMasterUrl = url
       master = context.actorSelection(Master.toAkkaUrl(activeMasterUrl))
       masterAddress = activeMasterUrl match {
-        case Master.sparkUrlRegex(host, port) => Address("akka.tcp", Master.systemName, host, port.toInt)
-        case x => throw new SparkException("Invalid spark URL:"+x)
+        case Master.sparkUrlRegex(host, port) =>
+          Address("akka.tcp", Master.systemName, host, port.toInt)
+        case x =>
+          throw new SparkException("Invalid spark URL: " + x)
       }
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/94b5881e/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 808b54c..87531b6 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -138,8 +138,10 @@ private[spark] class Worker(
       activeMasterWebUiUrl = uiUrl
       master = context.actorSelection(Master.toAkkaUrl(activeMasterUrl))
       masterAddress = activeMasterUrl match {
-        case Master.sparkUrlRegex(_host, _port) => Address("akka.tcp", Master.systemName, _host, _port.toInt)
-        case x => throw new SparkException("Invalid spark URL:"+x)
+        case Master.sparkUrlRegex(_host, _port) =>
+          Address("akka.tcp", Master.systemName, _host, _port.toInt)
+        case x =>
+          throw new SparkException("Invalid spark URL: " + x)
       }
       connected = true
     }

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/94b5881e/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala b/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
index df33f6b..b4451fc 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
@@ -44,7 +44,7 @@ abstract class BlockObjectWriter(val blockId: BlockId) {
    * Flush the partial writes and commit them as a single atomic block. Return the
    * number of bytes written for this commit.
    */
-  def commit(): LongSpark
+  def commit(): Long
 
   /**
    * Reverts writes that haven't been flushed yet. Callers should invoke this function

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/94b5881e/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala b/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala
index 9f3f163..74133ce 100644
--- a/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala
@@ -17,11 +17,8 @@
 
 package org.apache.spark.util
 
-import akka.actor.{IndestructibleActorSystem, ActorSystem, ExtendedActorSystem}
+import akka.actor.{ActorSystem, ExtendedActorSystem, IndestructibleActorSystem}
 import com.typesafe.config.ConfigFactory
-import scala.concurrent.duration._
-import scala.concurrent.Await
-import akka.remote.RemoteActorRefProvider
 
 /**
  * Various utility classes for working with Akka.
@@ -47,7 +44,8 @@ private[spark] object AkkaUtils {
     val akkaTimeout = System.getProperty("spark.akka.timeout", "100").toInt
 
     val akkaFrameSize = System.getProperty("spark.akka.frameSize", "10").toInt
-    val lifecycleEvents = if (System.getProperty("spark.akka.logLifecycleEvents", "false").toBoolean) "on" else "off"
+    val lifecycleEvents =
+      if (System.getProperty("spark.akka.logLifecycleEvents", "false").toBoolean) "on" else "off"
 
     val akkaHeartBeatPauses = System.getProperty("spark.akka.heartbeat.pauses", "600").toInt
     val akkaFailureDetector =


[17/50] git commit: Merge pull request #220 from rxin/zippart

Posted by pw...@apache.org.
Merge pull request #220 from rxin/zippart

Memoize preferred locations in ZippedPartitionsBaseRDD

so preferred location computation doesn't lead to exponential explosion.

This was a problem in GraphX where we have a whole chain of RDDs that are ZippedPartitionsRDD's, and the preferred locations were taking eternity to compute.

(cherry picked from commit e36fe55a031d2c01c9d7c5d85965951c681a0c74)
Signed-off-by: Reynold Xin <rx...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/87676a6a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/87676a6a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/87676a6a

Branch: refs/heads/scala-2.10
Commit: 87676a6af2c8fc33c5b5d4e7eb45e3e8558f3c33
Parents: 0780498 9cf7f31
Author: Matei Zaharia <ma...@eecs.berkeley.edu>
Authored: Fri Dec 6 11:01:42 2013 -0800
Committer: Matei Zaharia <ma...@eecs.berkeley.edu>
Committed: Fri Dec 6 11:01:42 2013 -0800

----------------------------------------------------------------------
 .../apache/spark/rdd/ZippedPartitionsRDD.scala  | 27 ++++++++------------
 1 file changed, 11 insertions(+), 16 deletions(-)
----------------------------------------------------------------------



[08/50] git commit: Merge pull request #228 from pwendell/master

Posted by pw...@apache.org.
Merge pull request #228 from pwendell/master

Document missing configs and set shuffle consolidation to false.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/5d460253
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/5d460253
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/5d460253

Branch: refs/heads/scala-2.10
Commit: 5d460253d6080d871cb71efb112ea17be0873771
Parents: 72b6961 75d161b
Author: Patrick Wendell <pw...@gmail.com>
Authored: Thu Dec 5 12:31:24 2013 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Thu Dec 5 12:31:24 2013 -0800

----------------------------------------------------------------------
 .../spark/storage/ShuffleBlockManager.scala     |  2 +-
 .../spark/storage/DiskBlockManagerSuite.scala   | 14 ++++++--
 docs/configuration.md                           | 37 +++++++++++++++++++-
 3 files changed, 49 insertions(+), 4 deletions(-)
----------------------------------------------------------------------



[11/50] git commit: A left over akka -> akka.tcp changes

Posted by pw...@apache.org.
A left over akka -> akka.tcp changes


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/4e704800
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/4e704800
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/4e704800

Branch: refs/heads/scala-2.10
Commit: 4e70480038e9654426876e8e6b2fc356b7f0c8ca
Parents: 5618af6
Author: Prashant Sharma <pr...@imaginea.com>
Authored: Fri Dec 6 12:25:32 2013 +0530
Committer: Prashant Sharma <pr...@imaginea.com>
Committed: Fri Dec 6 12:29:53 2013 +0530

----------------------------------------------------------------------
 .../org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala  | 2 +-
 .../main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/4e704800/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala
index e000531..e8fecec 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala
@@ -36,7 +36,7 @@ private[spark] class SimrSchedulerBackend(
   override def start() {
     super.start()
 
-    val driverUrl = "akka://spark@%s:%s/user/%s".format(
+    val driverUrl = "akka.tcp://spark@%s:%s/user/%s".format(
       System.getProperty("spark.driver.host"), System.getProperty("spark.driver.port"),
       CoarseGrainedSchedulerBackend.ACTOR_NAME)
 

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/4e704800/yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala
----------------------------------------------------------------------
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala
index 421a83c..b67e068 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala
@@ -168,7 +168,7 @@ class WorkerLauncher(args: ApplicationMasterArguments, conf: Configuration) exte
     System.setProperty("spark.driver.host", driverHost)
     System.setProperty("spark.driver.port", driverPort.toString)
 
-    val driverUrl = "akka://spark@%s:%s/user/%s".format(
+    val driverUrl = "akka.tcp://spark@%s:%s/user/%s".format(
       driverHost, driverPort.toString, CoarseGrainedSchedulerBackend.ACTOR_NAME)
 
     actor = actorSystem.actorOf(Props(new MonitorActor(driverUrl)), name = "YarnAM")


[25/50] git commit: Adding disclaimer for shuffle file consolidation

Posted by pw...@apache.org.
Adding disclaimer for shuffle file consolidation


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/b9451acd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/b9451acd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/b9451acd

Branch: refs/heads/scala-2.10
Commit: b9451acdf4c73ab4b0b9d9827ab1ccedbcfb144f
Parents: 241336a
Author: Patrick Wendell <pw...@gmail.com>
Authored: Fri Dec 6 19:25:28 2013 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Fri Dec 6 19:25:28 2013 -0800

----------------------------------------------------------------------
 docs/configuration.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/b9451acd/docs/configuration.md
----------------------------------------------------------------------
diff --git a/docs/configuration.md b/docs/configuration.md
index 22abe1c..65b41b4 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -331,7 +331,7 @@ Apart from these, the following properties are also available, and may be useful
   <td>spark.shuffle.consolidateFiles</td>
   <td>false</td>
   <td>
-    If set to "true", consolidates intermediate files created during a shuffle. Creating fewer files can improve filesystem performance if you run shuffles with large numbers of reduce tasks.
+    If set to "true", consolidates intermediate files created during a shuffle. Creating fewer files can improve filesystem performance for shuffles with large numbers of reduce tasks. It is reccomended to set this to "true" when using ext4 or xfs filesystems. On ext3, this option might degrade performance on machines with many (>8) cores due to filesystem limitations.
   </td>
 </tr>
 <tr>


[22/50] git commit: more docs

Posted by pw...@apache.org.
more docs


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/e2c2914f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/e2c2914f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/e2c2914f

Branch: refs/heads/scala-2.10
Commit: e2c2914faaf3d8ab849ad0477152f64df7adf4c3
Parents: f2fb4b4
Author: Ali Ghodsi <al...@cs.berkeley.edu>
Authored: Fri Dec 6 16:54:06 2013 -0800
Committer: Ali Ghodsi <al...@cs.berkeley.edu>
Committed: Fri Dec 6 16:54:06 2013 -0800

----------------------------------------------------------------------
 docs/cluster-overview.md | 2 +-
 docs/index.md            | 4 +++-
 docs/running-on-yarn.md  | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/e2c2914f/docs/cluster-overview.md
----------------------------------------------------------------------
diff --git a/docs/cluster-overview.md b/docs/cluster-overview.md
index 5927f73..e167032 100644
--- a/docs/cluster-overview.md
+++ b/docs/cluster-overview.md
@@ -45,7 +45,7 @@ The system currently supports three cluster managers:
   easy to set up a cluster.
 * [Apache Mesos](running-on-mesos.html) -- a general cluster manager that can also run Hadoop MapReduce
   and service applications.
-* [Hadoop YARN](running-on-yarn.html) -- the resource manager in Hadoop 2.0.
+* [Hadoop YARN](running-on-yarn.html) -- the resource manager in Hadoop 2.
 
 In addition, Spark's [EC2 launch scripts](ec2-scripts.html) make it easy to launch a standalone
 cluster on Amazon EC2.

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/e2c2914f/docs/index.md
----------------------------------------------------------------------
diff --git a/docs/index.md b/docs/index.md
index 56e1142..bbb2733 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -63,7 +63,9 @@ In addition, if you wish to run Spark on [YARN](running-on-yarn.md), set
 
     SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true sbt/sbt assembly
 
-(Note that on Windows, you need to set the environment variables on separate lines, e.g., `set SPARK_HADOOP_VERSION=1.2.1`.)
+Note that on Windows, you need to set the environment variables on separate lines, e.g., `set SPARK_HADOOP_VERSION=1.2.1`.
+
+For this version of Spark (0.8.1) Hadoop 2.2.x (or newer) users will have to build Spark and publish it locally. See [Launching Spark on YARN](running-on-yarn.md). This is needed because Hadoop 2.2 has non backwards compatible API changes.
 
 # Where to Go from Here
 

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/e2c2914f/docs/running-on-yarn.md
----------------------------------------------------------------------
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 3ec656c..ae65127 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -114,7 +114,7 @@ For example:
 
 # Building Spark for Hadoop/YARN 2.2.x
 
-Hadoop 2.2.x users must build Spark and publish it locally. The SBT build process handles Hadoop 2.2.x as a special case. This version of Hadoop has new YARN API changes and depends on a Protobuf version (2.5) that is not compatible with the Akka version (2.0.5) that Spark uses. Therefore, if the Hadoop version (e.g. set through ```SPARK_HADOOP_VERSION```) starts with 2.2.0 or higher then the build process will depend on Akka artifacts distributed by the Spark project compatible with Protobuf 2.5. Furthermore, the build process then uses the directory ```new-yarn``` (stead of ```yarn```), which supports the new YARN API. The build process should seamlessly work out of the box. 
+Hadoop 2.2.x users must build Spark and publish it locally. The SBT build process handles Hadoop 2.2.x as a special case. This version of Hadoop has new YARN API changes and depends on a Protobuf version (2.5) that is not compatible with the Akka version (2.0.5) that Spark uses. Therefore, if the Hadoop version (e.g. set through ```SPARK_HADOOP_VERSION```) starts with 2.2.0 or higher then the build process will depend on Akka artifacts distributed by the Spark project compatible with Protobuf 2.5. Furthermore, the build process then uses the directory ```new-yarn``` (instead of ```yarn```), which supports the new YARN API. The build process should seamlessly work out of the box. 
 
 See [Building Spark with Maven](building-with-maven.md) for instructions on how to build Spark using the Maven process.
 


[35/50] git commit: Various broken links in documentation

Posted by pw...@apache.org.
Various broken links in documentation


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/41c60b33
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/41c60b33
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/41c60b33

Branch: refs/heads/scala-2.10
Commit: 41c60b337abc4ddd92fe5d4b9337156f3bf8b089
Parents: 6494d62
Author: Patrick Wendell <pw...@gmail.com>
Authored: Sat Dec 7 22:20:14 2013 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Sat Dec 7 22:31:44 2013 -0800

----------------------------------------------------------------------
 docs/bagel-programming-guide.md          | 2 +-
 docs/hadoop-third-party-distributions.md | 2 +-
 docs/index.md                            | 2 +-
 docs/job-scheduling.md                   | 2 +-
 docs/running-on-yarn.md                  | 4 ++--
 docs/streaming-programming-guide.md      | 8 ++++----
 6 files changed, 10 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/41c60b33/docs/bagel-programming-guide.md
----------------------------------------------------------------------
diff --git a/docs/bagel-programming-guide.md b/docs/bagel-programming-guide.md
index 140190a..de001e6 100644
--- a/docs/bagel-programming-guide.md
+++ b/docs/bagel-programming-guide.md
@@ -106,7 +106,7 @@ _Example_
 
 ## Operations
 
-Here are the actions and types in the Bagel API. See [Bagel.scala](https://github.com/apache/incubator-spark/blob/master/bagel/src/main/scala/spark/bagel/Bagel.scala) for details.
+Here are the actions and types in the Bagel API. See [Bagel.scala](https://github.com/apache/incubator-spark/blob/master/bagel/src/main/scala/org/apache/spark/bagel/Bagel.scala) for details.
 
 ### Actions
 

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/41c60b33/docs/hadoop-third-party-distributions.md
----------------------------------------------------------------------
diff --git a/docs/hadoop-third-party-distributions.md b/docs/hadoop-third-party-distributions.md
index b33af2c..92d2c95 100644
--- a/docs/hadoop-third-party-distributions.md
+++ b/docs/hadoop-third-party-distributions.md
@@ -10,7 +10,7 @@ with these distributions:
 # Compile-time Hadoop Version
 
 When compiling Spark, you'll need to 
-[set the SPARK_HADOOP_VERSION flag](http://localhost:4000/index.html#a-note-about-hadoop-versions):
+[set the SPARK_HADOOP_VERSION flag](index.html#a-note-about-hadoop-versions):
 
     SPARK_HADOOP_VERSION=1.0.4 sbt/sbt assembly
 

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/41c60b33/docs/index.md
----------------------------------------------------------------------
diff --git a/docs/index.md b/docs/index.md
index 45616f7..d3ac696 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -58,7 +58,7 @@ By default, Spark links to Hadoop 1.0.4. You can change this by setting the
 
     SPARK_HADOOP_VERSION=2.2.0 sbt/sbt assembly
 
-In addition, if you wish to run Spark on [YARN](running-on-yarn.md), set
+In addition, if you wish to run Spark on [YARN](running-on-yarn.html), set
 `SPARK_YARN` to `true`:
 
     SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true sbt/sbt assembly

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/41c60b33/docs/job-scheduling.md
----------------------------------------------------------------------
diff --git a/docs/job-scheduling.md b/docs/job-scheduling.md
index d304c54..dbcb9ae 100644
--- a/docs/job-scheduling.md
+++ b/docs/job-scheduling.md
@@ -91,7 +91,7 @@ The fair scheduler also supports grouping jobs into _pools_, and setting differe
 (e.g. weight) for each pool. This can be useful to create a "high-priority" pool for more important jobs,
 for example, or to group the jobs of each user together and give _users_ equal shares regardless of how
 many concurrent jobs they have instead of giving _jobs_ equal shares. This approach is modeled after the
-[Hadoop Fair Scheduler](http://hadoop.apache.org/docs/stable/fair_scheduler.html).
+[Hadoop Fair Scheduler](http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/FairScheduler.html).
 
 Without any intervention, newly submitted jobs go into a _default pool_, but jobs' pools can be set by
 adding the `spark.scheduler.pool` "local property" to the SparkContext in the thread that's submitting them.

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/41c60b33/docs/running-on-yarn.md
----------------------------------------------------------------------
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index ae65127..9e4c4e1 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -116,7 +116,7 @@ For example:
 
 Hadoop 2.2.x users must build Spark and publish it locally. The SBT build process handles Hadoop 2.2.x as a special case. This version of Hadoop has new YARN API changes and depends on a Protobuf version (2.5) that is not compatible with the Akka version (2.0.5) that Spark uses. Therefore, if the Hadoop version (e.g. set through ```SPARK_HADOOP_VERSION```) starts with 2.2.0 or higher then the build process will depend on Akka artifacts distributed by the Spark project compatible with Protobuf 2.5. Furthermore, the build process then uses the directory ```new-yarn``` (instead of ```yarn```), which supports the new YARN API. The build process should seamlessly work out of the box. 
 
-See [Building Spark with Maven](building-with-maven.md) for instructions on how to build Spark using the Maven process.
+See [Building Spark with Maven](building-with-maven.html) for instructions on how to build Spark using the Maven process.
 
 # Important Notes
 
@@ -124,4 +124,4 @@ See [Building Spark with Maven](building-with-maven.md) for instructions on how
 - The local directories used for spark will be the local directories configured for YARN (Hadoop Yarn config yarn.nodemanager.local-dirs). If the user specifies spark.local.dir, it will be ignored.
 - The --files and --archives options support specifying file names with the # similar to Hadoop. For example you can specify: --files localtest.txt#appSees.txt and this will upload the file you have locally named localtest.txt into HDFS but this will be linked to by the name appSees.txt and your application should use the name as appSees.txt to reference it when running on YARN.
 - The --addJars option allows the SparkContext.addJar function to work if you are using it with local files. It does not need to be used if you are using it with HDFS, HTTP, HTTPS, or FTP files.
-- YARN 2.2.x users cannot simply depend on the Spark packages without building Spark, as the published Spark artifacts are compiled to work with the pre 2.2 API. Those users must build Spark and publish it locally.  
\ No newline at end of file
+- YARN 2.2.x users cannot simply depend on the Spark packages without building Spark, as the published Spark artifacts are compiled to work with the pre 2.2 API. Those users must build Spark and publish it locally.  

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/41c60b33/docs/streaming-programming-guide.md
----------------------------------------------------------------------
diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
index 851e30f..82f42e0 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -214,7 +214,7 @@ ssc.stop()
 {% endhighlight %}
 
 # Example
-A simple example to start off is the [NetworkWordCount](https://github.com/apache/incubator-spark/tree/master/examples/src/main/scala/spark/streaming/examples/NetworkWordCount.scala). This example counts the words received from a network server every second. Given below is the relevant sections of the source code. You can find the full source code in `<Spark repo>/streaming/src/main/scala/spark/streaming/examples/NetworkWordCount.scala` .
+A simple example to start off is the [NetworkWordCount](https://github.com/apache/incubator-spark/tree/master/examples/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala). This example counts the words received from a network server every second. Given below is the relevant sections of the source code. You can find the full source code in `<Spark repo>/streaming/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala` .
 
 {% highlight scala %}
 import org.apache.spark.streaming.{Seconds, StreamingContext}
@@ -283,7 +283,7 @@ Time: 1357008430000 ms
 </td>
 </table>
 
-You can find more examples in `<Spark repo>/streaming/src/main/scala/spark/streaming/examples/`. They can be run in the similar manner using `./run-example org.apache.spark.streaming.examples....` . Executing without any parameter would give the required parameter list. Further explanation to run them can be found in comments in the files.
+You can find more examples in `<Spark repo>/streaming/src/main/scala/org/apache/spark/streaming/examples/`. They can be run in the similar manner using `./run-example org.apache.spark.streaming.examples....` . Executing without any parameter would give the required parameter list. Further explanation to run them can be found in comments in the files.
 
 # DStream Persistence
 Similar to RDDs, DStreams also allow developers to persist the stream's data in memory. That is, using `persist()` method on a DStream would automatically persist every RDD of that DStream in memory. This is useful if the data in the DStream will be computed multiple times (e.g., multiple operations on the same data). For window-based operations like `reduceByWindow` and `reduceByKeyAndWindow` and state-based operations like `updateStateByKey`, this is implicitly true. Hence, DStreams generated by window-based operations are automatically persisted in memory, without the developer calling `persist()`.
@@ -483,7 +483,7 @@ Similar to [Spark's Java API](java-programming-guide.html), we also provide a Ja
 1. Functions for transformations must be implemented as subclasses of [Function](api/core/index.html#org.apache.spark.api.java.function.Function) and [Function2](api/core/index.html#org.apache.spark.api.java.function.Function2)
 1. Unlike the Scala API, the Java API handles DStreams for key-value pairs using a separate [JavaPairDStream](api/streaming/index.html#org.apache.spark.streaming.api.java.JavaPairDStream) class(similar to [JavaRDD and JavaPairRDD](java-programming-guide.html#rdd-classes). DStream functions like `map` and `filter` are implemented separately by JavaDStreams and JavaPairDStream to return DStreams of appropriate types.
 
-Spark's [Java Programming Guide](java-programming-guide.html) gives more ideas about using the Java API. To extends the ideas presented for the RDDs to DStreams, we present parts of the Java version of the same NetworkWordCount example presented above. The full source code is given at `<spark repo>/examples/src/main/java/spark/streaming/examples/JavaNetworkWordCount.java`
+Spark's [Java Programming Guide](java-programming-guide.html) gives more ideas about using the Java API. To extends the ideas presented for the RDDs to DStreams, we present parts of the Java version of the same NetworkWordCount example presented above. The full source code is given at `<spark repo>/examples/src/main/java/org/apache/spark/streaming/examples/JavaNetworkWordCount.java`
 
 The streaming context and the socket stream from input source is started by using a `JavaStreamingContext`, that has the same parameters and provides the same input streams as its Scala counterpart.
 
@@ -527,5 +527,5 @@ JavaPairDStream<String, Integer> wordCounts = words.map(
 # Where to Go from Here
 
 * API docs - [Scala](api/streaming/index.html#org.apache.spark.streaming.package) and [Java](api/streaming/index.html#org.apache.spark.streaming.api.java.package)
-* More examples - [Scala](https://github.com/apache/incubator-spark/tree/master/examples/src/main/scala/spark/streaming/examples) and [Java](https://github.com/apache/incubator-spark/tree/master/examples/src/main/java/spark/streaming/examples)
+* More examples - [Scala](https://github.com/apache/incubator-spark/tree/master/examples/src/main/scala/org/apache/spark/streaming/examples) and [Java](https://github.com/apache/incubator-spark/tree/master/examples/src/main/java/org/apache/spark/streaming/examples)
 * [Paper describing Spark Streaming](http://www.eecs.berkeley.edu/Pubs/TechRpts/2012/EECS-2012-259.pdf)


[33/50] git commit: Merge pull request #239 from aarondav/nit

Posted by pw...@apache.org.
Merge pull request #239 from aarondav/nit

Correct spellling error in configuration.md


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/f466f79b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/f466f79b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/f466f79b

Branch: refs/heads/scala-2.10
Commit: f466f79b88cc0766d87de4ea5bdb27d826be4487
Parents: 10c3c0c cb6ac8a
Author: Patrick Wendell <pw...@gmail.com>
Authored: Sat Dec 7 11:51:52 2013 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Sat Dec 7 11:51:52 2013 -0800

----------------------------------------------------------------------
 docs/configuration.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------



[41/50] git commit: License headers

Posted by pw...@apache.org.
License headers


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/5b74609d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/5b74609d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/5b74609d

Branch: refs/heads/scala-2.10
Commit: 5b74609d97cce9c350e959547e7fc524182a457c
Parents: 0428145
Author: Patrick Wendell <pw...@gmail.com>
Authored: Mon Dec 9 16:40:30 2013 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Mon Dec 9 16:41:01 2013 -0800

----------------------------------------------------------------------
 .../apache/spark/storage/StoragePerfTester.scala   | 17 +++++++++++++++++
 .../spark/deploy/worker/ExecutorRunnerTest.scala   | 17 +++++++++++++++++
 .../spark/storage/DiskBlockManagerSuite.scala      | 17 +++++++++++++++++
 .../spark/util/collection/OpenHashMapSuite.scala   | 17 +++++++++++++++++
 .../spark/util/collection/OpenHashSetSuite.scala   | 17 +++++++++++++++++
 .../collection/PrimitiveKeyOpenHashMapSuite.scala  | 17 +++++++++++++++++
 python/test_support/userlibrary.py                 | 17 +++++++++++++++++
 7 files changed, 119 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/5b74609d/core/src/main/scala/org/apache/spark/storage/StoragePerfTester.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/storage/StoragePerfTester.scala b/core/src/main/scala/org/apache/spark/storage/StoragePerfTester.scala
index 1e4db4f..d52b3d8 100644
--- a/core/src/main/scala/org/apache/spark/storage/StoragePerfTester.scala
+++ b/core/src/main/scala/org/apache/spark/storage/StoragePerfTester.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.spark.storage
 
 import java.util.concurrent.atomic.AtomicLong

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/5b74609d/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala b/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala
index 8f09541..4cb4ddc 100644
--- a/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.spark.deploy.worker
 
 import java.io.File

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/5b74609d/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
index ef4c4c0..070982e 100644
--- a/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.spark.storage
 
 import java.io.{FileWriter, File}

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/5b74609d/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
index 63e874f..e9b62ea 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.spark.util.collection
 
 import scala.collection.mutable.HashSet

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/5b74609d/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala
index 4768a1e..1b24f8f 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.spark.util.collection
 
 import org.scalatest.FunSuite

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/5b74609d/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMapSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMapSuite.scala
index 2220b4f..3b60dec 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMapSuite.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.spark.util.collection
 
 import scala.collection.mutable.HashSet

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/5b74609d/python/test_support/userlibrary.py
----------------------------------------------------------------------
diff --git a/python/test_support/userlibrary.py b/python/test_support/userlibrary.py
index 5bb6f50..8e4a629 100755
--- a/python/test_support/userlibrary.py
+++ b/python/test_support/userlibrary.py
@@ -1,3 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 """
 Used to test shipping of code depenencies with SparkContext.addPyFile().
 """


[42/50] git commit: Merge pull request #246 from pwendell/master

Posted by pw...@apache.org.
Merge pull request #246 from pwendell/master

Add missing license headers

I found this when doing further audits on the 0.8.1 release candidate.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/6169fe14
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/6169fe14
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/6169fe14

Branch: refs/heads/scala-2.10
Commit: 6169fe14a140146602fb07cfcd13eee6efad98f9
Parents: d992ec6 5b74609
Author: Patrick Wendell <pw...@gmail.com>
Authored: Mon Dec 9 16:51:36 2013 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Mon Dec 9 16:51:36 2013 -0800

----------------------------------------------------------------------
 .../apache/spark/storage/StoragePerfTester.scala   | 17 +++++++++++++++++
 .../spark/deploy/worker/ExecutorRunnerTest.scala   | 17 +++++++++++++++++
 .../spark/storage/DiskBlockManagerSuite.scala      | 17 +++++++++++++++++
 .../spark/util/collection/OpenHashMapSuite.scala   | 17 +++++++++++++++++
 .../spark/util/collection/OpenHashSetSuite.scala   | 17 +++++++++++++++++
 .../collection/PrimitiveKeyOpenHashMapSuite.scala  | 17 +++++++++++++++++
 python/test_support/userlibrary.py                 | 17 +++++++++++++++++
 7 files changed, 119 insertions(+)
----------------------------------------------------------------------



[45/50] git commit: Merge pull request #250 from pwendell/master

Posted by pw...@apache.org.
Merge pull request #250 from pwendell/master

README incorrectly suggests build sources spark-env.sh

This is misleading because the build doesn't source that file. IMO
it's better to force people to specify build environment variables
on the command line always, like we do in every example, so I'm
just removing this doc.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/d2efe135
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/d2efe135
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/d2efe135

Branch: refs/heads/scala-2.10
Commit: d2efe13574090e93c600adeacc7f6356bc196e6c
Parents: 6169fe1 153cad1
Author: Patrick Wendell <pw...@gmail.com>
Authored: Tue Dec 10 13:01:26 2013 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Tue Dec 10 13:01:26 2013 -0800

----------------------------------------------------------------------
 README.md | 3 ---
 1 file changed, 3 deletions(-)
----------------------------------------------------------------------



[31/50] git commit: Correct spellling error in configuration.md

Posted by pw...@apache.org.
Correct spellling error in configuration.md


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/cb6ac8aa
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/cb6ac8aa
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/cb6ac8aa

Branch: refs/heads/scala-2.10
Commit: cb6ac8aafb5491f137146db8e8937cbefc7ffdb1
Parents: 10c3c0c
Author: Aaron Davidson <aa...@databricks.com>
Authored: Sat Dec 7 01:39:52 2013 -0800
Committer: Aaron Davidson <aa...@databricks.com>
Committed: Sat Dec 7 01:40:01 2013 -0800

----------------------------------------------------------------------
 docs/configuration.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/cb6ac8aa/docs/configuration.md
----------------------------------------------------------------------
diff --git a/docs/configuration.md b/docs/configuration.md
index e86b9ea..62c4457 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -331,7 +331,7 @@ Apart from these, the following properties are also available, and may be useful
   <td>spark.shuffle.consolidateFiles</td>
   <td>false</td>
   <td>
-    If set to "true", consolidates intermediate files created during a shuffle. Creating fewer files can improve filesystem performance for shuffles with large numbers of reduce tasks. It is reccomended to set this to "true" when using ext4 or xfs filesystems. On ext3, this option might degrade performance on machines with many (>8) cores due to filesystem limitations.
+    If set to "true", consolidates intermediate files created during a shuffle. Creating fewer files can improve filesystem performance for shuffles with large numbers of reduce tasks. It is recommended to set this to "true" when using ext4 or xfs filesystems. On ext3, this option might degrade performance on machines with many (>8) cores due to filesystem limitations.
   </td>
 </tr>
 <tr>


[50/50] git commit: Merge pull request #254 from ScrapCodes/scala-2.10

Posted by pw...@apache.org.
Merge pull request #254 from ScrapCodes/scala-2.10

Scala 2.10 migration

This PR migrates spark to scala 2.10.

Summary of changes apart from scala 2.10 migration:
(has no implications for user.)
1. Migrated Akka to 2.2.3.

Does not use remote death watch for it has a bug, where it tries to send message to dead node infinitely.

Uses an indestructible actorsystem which tolerates errors only on executors.

(Might be useful for user.)
4. New configuration settings introduced:

System.getProperty("spark.akka.heartbeat.pauses", "600")
System.getProperty("spark.akka.failure-detector.threshold", "300.0")
System.getProperty("spark.akka.heartbeat.interval", "1000")

Defaults for these are fairly large to only disable Failure detector that comes with akka. The reason for doing so is we have our own failure detector like mechanism in place and then this is just an overhead on top of that + it leads to a lot of false positives. But with these properties it is possible to enable them. A good use case for enabling it could be when someone wants spark to be sensitive (in a controllable manner ofc.) to GC pauses/Network lags and quickly evict executors that experienced it. More information is included in configuration.md

Once we have the SPARK-544 merged, I had like to deprecate atleast these akka properties and may be others too.

This PR is duplicate of #221(where all the discussion happened.) for that one pointed to master this one points to scala-2.10 branch.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/2e89398e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/2e89398e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/2e89398e

Branch: refs/heads/scala-2.10
Commit: 2e89398e44b3103598a4fae0b09368ed9fbda9c2
Parents: 5429d62 d3090b7
Author: Patrick Wendell <pw...@gmail.com>
Authored: Wed Dec 11 23:10:53 2013 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Wed Dec 11 23:10:53 2013 -0800

----------------------------------------------------------------------
 README.md                                       |   10 +-
 assembly/pom.xml                                |   16 +-
 bagel/pom.xml                                   |   12 +-
 bin/compute-classpath.sh                        |   22 +-
 bin/slaves.sh                                   |   19 +-
 bin/spark-daemon.sh                             |   21 +-
 bin/spark-daemons.sh                            |    2 +-
 bin/stop-slaves.sh                              |    2 -
 conf/metrics.properties.template                |    8 +
 core/pom.xml                                    |   38 +-
 .../spark/network/netty/FileClientHandler.java  |    3 +-
 .../spark/network/netty/FileServerHandler.java  |   23 +-
 .../spark/network/netty/PathResolver.java       |   11 +-
 .../hadoop/mapred/SparkHadoopMapRedUtil.scala   |   17 +-
 .../mapreduce/SparkHadoopMapReduceUtil.scala    |   33 +-
 .../scala/org/apache/spark/Aggregator.scala     |   49 +-
 .../apache/spark/BlockStoreShuffleFetcher.scala |   23 +-
 .../scala/org/apache/spark/CacheManager.scala   |   12 +-
 .../scala/org/apache/spark/FutureAction.scala   |  250 +++++
 .../apache/spark/InterruptibleIterator.scala    |   30 +
 .../org/apache/spark/MapOutputTracker.scala     |  194 ++--
 .../scala/org/apache/spark/Partitioner.scala    |    6 +-
 .../scala/org/apache/spark/ShuffleFetcher.scala |    5 +-
 .../scala/org/apache/spark/SparkContext.scala   |  398 ++++---
 .../main/scala/org/apache/spark/SparkEnv.scala  |   33 +-
 .../org/apache/spark/SparkHadoopWriter.scala    |   21 +-
 .../scala/org/apache/spark/TaskContext.scala    |   21 +-
 .../scala/org/apache/spark/TaskEndReason.scala  |    2 +
 .../apache/spark/api/java/JavaDoubleRDD.scala   |   64 ++
 .../org/apache/spark/api/java/JavaPairRDD.scala |   35 +
 .../org/apache/spark/api/java/JavaRDD.scala     |   19 +
 .../java/function/DoubleFlatMapFunction.java    |   10 +-
 .../spark/api/java/function/DoubleFunction.java |    3 +-
 .../api/java/function/FlatMapFunction.scala     |    3 -
 .../api/java/function/FlatMapFunction2.scala    |    3 -
 .../spark/api/java/function/Function.java       |    4 +-
 .../spark/api/java/function/Function2.java      |    2 -
 .../spark/api/java/function/Function3.java      |   36 +
 .../api/java/function/PairFlatMapFunction.java  |    2 -
 .../spark/api/java/function/PairFunction.java   |    5 +-
 .../api/java/function/WrappedFunction3.scala    |   34 +
 .../org/apache/spark/api/python/PythonRDD.scala |  151 +--
 .../spark/broadcast/BitTorrentBroadcast.scala   | 1058 ------------------
 .../apache/spark/broadcast/HttpBroadcast.scala  |   23 +-
 .../apache/spark/broadcast/MultiTracker.scala   |  410 -------
 .../org/apache/spark/broadcast/SourceInfo.scala |   54 -
 .../spark/broadcast/TorrentBroadcast.scala      |  247 ++++
 .../apache/spark/broadcast/TreeBroadcast.scala  |  603 ----------
 .../org/apache/spark/deploy/DeployMessage.scala |   29 +-
 .../spark/deploy/ExecutorDescription.scala      |   34 +
 .../spark/deploy/FaultToleranceTest.scala       |  420 +++++++
 .../org/apache/spark/deploy/JsonProtocol.scala  |    3 +-
 .../apache/spark/deploy/LocalSparkCluster.scala |   13 +-
 .../apache/spark/deploy/SparkHadoopUtil.scala   |   60 +-
 .../org/apache/spark/deploy/client/Client.scala |   98 +-
 .../spark/deploy/client/ClientListener.scala    |    4 +
 .../apache/spark/deploy/client/TestClient.scala |    7 +-
 .../spark/deploy/master/ApplicationInfo.scala   |   53 +-
 .../spark/deploy/master/ApplicationState.scala  |    5 +-
 .../spark/deploy/master/ExecutorInfo.scala      |    7 +-
 .../master/FileSystemPersistenceEngine.scala    |   90 ++
 .../deploy/master/LeaderElectionAgent.scala     |   45 +
 .../org/apache/spark/deploy/master/Master.scala |  260 +++--
 .../spark/deploy/master/MasterMessages.scala    |   46 +
 .../spark/deploy/master/PersistenceEngine.scala |   53 +
 .../spark/deploy/master/RecoveryState.scala     |   24 +
 .../deploy/master/SparkZooKeeperSession.scala   |  203 ++++
 .../apache/spark/deploy/master/WorkerInfo.scala |   42 +-
 .../spark/deploy/master/WorkerState.scala       |    2 +-
 .../master/ZooKeeperLeaderElectionAgent.scala   |  136 +++
 .../master/ZooKeeperPersistenceEngine.scala     |   85 ++
 .../spark/deploy/worker/ExecutorRunner.scala    |   15 +-
 .../org/apache/spark/deploy/worker/Worker.scala |  193 +++-
 .../spark/deploy/worker/WorkerArguments.scala   |    8 +-
 .../spark/deploy/worker/ui/WorkerWebUI.scala    |   16 +-
 .../executor/CoarseGrainedExecutorBackend.scala |  121 ++
 .../org/apache/spark/executor/Executor.scala    |  173 ++-
 .../apache/spark/executor/ExecutorSource.scala  |    2 -
 .../spark/executor/MesosExecutorBackend.scala   |   18 +-
 .../executor/StandaloneExecutorBackend.scala    |  119 --
 .../org/apache/spark/executor/TaskMetrics.scala |   26 +-
 .../spark/metrics/sink/GraphiteSink.scala       |   82 ++
 .../spark/network/ConnectionManager.scala       |    3 +-
 .../apache/spark/network/netty/FileHeader.scala |   22 +-
 .../spark/network/netty/ShuffleCopier.scala     |   29 +-
 .../spark/network/netty/ShuffleSender.scala     |    9 +-
 .../main/scala/org/apache/spark/package.scala   |    2 +
 .../org/apache/spark/rdd/AsyncRDDActions.scala  |  123 ++
 .../scala/org/apache/spark/rdd/BlockRDD.scala   |    9 +-
 .../org/apache/spark/rdd/CartesianRDD.scala     |    2 +-
 .../org/apache/spark/rdd/CheckpointRDD.scala    |   20 +-
 .../org/apache/spark/rdd/CoGroupedRDD.scala     |   26 +-
 .../apache/spark/rdd/DoubleRDDFunctions.scala   |  127 +++
 .../scala/org/apache/spark/rdd/HadoopRDD.scala  |  125 +--
 .../scala/org/apache/spark/rdd/JdbcRDD.scala    |    1 +
 .../org/apache/spark/rdd/MapPartitionsRDD.scala |   11 +-
 .../spark/rdd/MapPartitionsWithIndexRDD.scala   |   42 -
 .../scala/org/apache/spark/rdd/MappedRDD.scala  |    3 +-
 .../org/apache/spark/rdd/NewHadoopRDD.scala     |   79 +-
 .../apache/spark/rdd/OrderedRDDFunctions.scala  |    3 +-
 .../org/apache/spark/rdd/PairRDDFunctions.scala |   16 +-
 .../spark/rdd/ParallelCollectionRDD.scala       |    5 +-
 .../apache/spark/rdd/PartitionPruningRDD.scala  |    8 +-
 .../main/scala/org/apache/spark/rdd/RDD.scala   |  135 ++-
 .../org/apache/spark/rdd/ShuffledRDD.scala      |    5 +-
 .../org/apache/spark/rdd/SubtractedRDD.scala    |    2 +-
 .../apache/spark/rdd/ZippedPartitionsRDD.scala  |   48 +-
 .../apache/spark/scheduler/DAGScheduler.scala   |  589 +++++++---
 .../spark/scheduler/DAGSchedulerEvent.scala     |   30 +-
 .../spark/scheduler/DAGSchedulerSource.scala    |    2 +-
 .../spark/scheduler/InputFormatInfo.scala       |    7 +-
 .../org/apache/spark/scheduler/JobLogger.scala  |  676 ++++++-----
 .../org/apache/spark/scheduler/JobWaiter.scala  |   63 +-
 .../scala/org/apache/spark/scheduler/Pool.scala |    5 +-
 .../org/apache/spark/scheduler/ResultTask.scala |   48 +-
 .../spark/scheduler/SchedulableBuilder.scala    |    3 +
 .../apache/spark/scheduler/ShuffleMapTask.scala |   77 +-
 .../apache/spark/scheduler/SparkListener.scala  |   23 +-
 .../spark/scheduler/SparkListenerBus.scala      |    2 +
 .../org/apache/spark/scheduler/Stage.scala      |    6 +-
 .../org/apache/spark/scheduler/StageInfo.scala  |   14 +-
 .../scala/org/apache/spark/scheduler/Task.scala |   63 +-
 .../org/apache/spark/scheduler/TaskInfo.scala   |   22 +
 .../org/apache/spark/scheduler/TaskResult.scala |    3 +-
 .../apache/spark/scheduler/TaskScheduler.scala  |   10 +-
 .../spark/scheduler/TaskSchedulerListener.scala |   44 -
 .../org/apache/spark/scheduler/TaskSet.scala    |    4 +
 .../scheduler/cluster/ClusterScheduler.scala    |   96 +-
 .../cluster/ClusterTaskSetManager.scala         |  116 +-
 .../cluster/CoarseGrainedClusterMessage.scala   |   69 ++
 .../cluster/CoarseGrainedSchedulerBackend.scala |  230 ++++
 .../scheduler/cluster/SchedulerBackend.scala    |    6 +-
 .../cluster/SimrSchedulerBackend.scala          |   67 ++
 .../cluster/SparkDeploySchedulerBackend.scala   |   20 +-
 .../cluster/StandaloneClusterMessage.scala      |   63 --
 .../cluster/StandaloneSchedulerBackend.scala    |  200 ----
 .../scheduler/cluster/TaskResultGetter.scala    |   26 +-
 .../mesos/CoarseMesosSchedulerBackend.scala     |   16 +-
 .../spark/scheduler/local/LocalScheduler.scala  |  201 ++--
 .../scheduler/local/LocalTaskSetManager.scala   |   24 +-
 .../spark/serializer/KryoSerializer.scala       |   52 +-
 .../apache/spark/storage/BlockException.scala   |    2 +-
 .../spark/storage/BlockFetcherIterator.scala    |   24 +-
 .../org/apache/spark/storage/BlockId.scala      |  103 ++
 .../org/apache/spark/storage/BlockInfo.scala    |   81 ++
 .../org/apache/spark/storage/BlockManager.scala |  632 ++++-------
 .../spark/storage/BlockManagerMaster.scala      |   29 +-
 .../spark/storage/BlockManagerMasterActor.scala |   25 +-
 .../spark/storage/BlockManagerMessages.scala    |   16 +-
 .../spark/storage/BlockManagerSlaveActor.scala  |    1 +
 .../spark/storage/BlockManagerWorker.scala      |    4 +-
 .../org/apache/spark/storage/BlockMessage.scala |   38 +-
 .../spark/storage/BlockMessageArray.scala       |    7 +-
 .../spark/storage/BlockObjectWriter.scala       |  144 ++-
 .../org/apache/spark/storage/BlockStore.scala   |   14 +-
 .../apache/spark/storage/DiskBlockManager.scala |  151 +++
 .../org/apache/spark/storage/DiskStore.scala    |  280 +----
 .../org/apache/spark/storage/FileSegment.scala  |   28 +
 .../org/apache/spark/storage/MemoryStore.scala  |   34 +-
 .../spark/storage/ShuffleBlockManager.scala     |  200 +++-
 .../org/apache/spark/storage/StorageLevel.scala |    2 +-
 .../spark/storage/StoragePerfTester.scala       |  103 ++
 .../org/apache/spark/storage/StorageUtils.scala |   47 +-
 .../apache/spark/storage/ThreadingTest.scala    |    8 +-
 .../apache/spark/ui/UIWorkloadGenerator.scala   |    2 +-
 .../org/apache/spark/ui/exec/ExecutorsUI.scala  |   23 +-
 .../org/apache/spark/ui/jobs/IndexPage.scala    |    2 +-
 .../spark/ui/jobs/JobProgressListener.scala     |  105 +-
 .../org/apache/spark/ui/jobs/PoolTable.scala    |    8 +-
 .../org/apache/spark/ui/jobs/StagePage.scala    |   83 +-
 .../org/apache/spark/ui/jobs/StageTable.scala   |   33 +-
 .../org/apache/spark/ui/storage/RDDPage.scala   |   23 +-
 .../scala/org/apache/spark/util/AkkaUtils.scala |   67 +-
 .../org/apache/spark/util/AppendOnlyMap.scala   |  237 ++++
 .../spark/util/IndestructibleActorSystem.scala  |   68 ++
 .../org/apache/spark/util/MetadataCleaner.scala |   36 +-
 .../scala/org/apache/spark/util/Utils.scala     |   72 +-
 .../org/apache/spark/util/XORShiftRandom.scala  |   94 ++
 .../apache/spark/util/collection/BitSet.scala   |  103 ++
 .../spark/util/collection/OpenHashMap.scala     |  153 +++
 .../spark/util/collection/OpenHashSet.scala     |  279 +++++
 .../collection/PrimitiveKeyOpenHashMap.scala    |  128 +++
 .../spark/util/collection/PrimitiveVector.scala |   69 ++
 .../scala/org/apache/spark/BroadcastSuite.scala |   52 +-
 .../org/apache/spark/CacheManagerSuite.scala    |   21 +-
 .../org/apache/spark/CheckpointSuite.scala      |    8 +-
 .../org/apache/spark/DistributedSuite.scala     |   16 +-
 .../org/apache/spark/FileServerSuite.scala      |   16 +
 .../scala/org/apache/spark/JavaAPISuite.java    |   37 +-
 .../org/apache/spark/JobCancellationSuite.scala |  209 ++++
 .../org/apache/spark/LocalSparkContext.scala    |    2 +-
 .../apache/spark/MapOutputTrackerSuite.scala    |   26 +-
 .../apache/spark/PartitionPruningRDDSuite.scala |   45 -
 .../org/apache/spark/PartitioningSuite.scala    |   10 +-
 .../SparkContextSchedulerCreationSuite.scala    |  140 +++
 .../apache/spark/deploy/JsonProtocolSuite.scala |    7 +-
 .../deploy/worker/ExecutorRunnerTest.scala      |   36 +
 .../apache/spark/rdd/AsyncRDDActionsSuite.scala |  202 ++++
 .../org/apache/spark/rdd/DoubleRDDSuite.scala   |  271 +++++
 .../spark/rdd/PairRDDFunctionsSuite.scala       |    2 +-
 .../spark/rdd/PartitionPruningRDDSuite.scala    |   86 ++
 .../scala/org/apache/spark/rdd/RDDSuite.scala   |   20 +
 .../spark/scheduler/DAGSchedulerSuite.scala     |   80 +-
 .../apache/spark/scheduler/JobLoggerSuite.scala |   24 +-
 .../spark/scheduler/SparkListenerSuite.scala    |  136 ++-
 .../cluster/ClusterTaskSetManagerSuite.scala    |   49 +-
 .../spark/scheduler/cluster/FakeTask.scala      |    5 +-
 .../cluster/TaskResultGetterSuite.scala         |    3 +-
 .../scheduler/local/LocalSchedulerSuite.scala   |   28 +-
 .../org/apache/spark/storage/BlockIdSuite.scala |  114 ++
 .../spark/storage/BlockManagerSuite.scala       |  104 +-
 .../spark/storage/DiskBlockManagerSuite.scala   |  111 ++
 .../apache/spark/util/AppendOnlyMapSuite.scala  |  154 +++
 .../apache/spark/util/XORShiftRandomSuite.scala |   76 ++
 .../spark/util/collection/BitSetSuite.scala     |   73 ++
 .../util/collection/OpenHashMapSuite.scala      |  177 +++
 .../util/collection/OpenHashSetSuite.scala      |  180 +++
 .../PrimitiveKeyOpenHashMapSuite.scala          |  119 ++
 .../util/collection/PrimitiveVectorSuite.scala  |  117 ++
 docker/README.md                                |    5 +
 docker/build                                    |   22 +
 docker/spark-test/README.md                     |   11 +
 docker/spark-test/base/Dockerfile               |   38 +
 docker/spark-test/build                         |   22 +
 docker/spark-test/master/Dockerfile             |   21 +
 docker/spark-test/master/default_cmd            |   22 +
 docker/spark-test/worker/Dockerfile             |   22 +
 docker/spark-test/worker/default_cmd            |   22 +
 docs/_layouts/global.html                       |    8 +-
 docs/bagel-programming-guide.md                 |    2 +-
 docs/building-with-maven.md                     |    6 +
 docs/cluster-overview.md                        |   16 +-
 docs/configuration.md                           |   69 +-
 docs/ec2-scripts.md                             |    2 +-
 docs/hadoop-third-party-distributions.md        |    7 +-
 docs/index.md                                   |    8 +-
 docs/job-scheduling.md                          |    2 +-
 docs/monitoring.md                              |    1 +
 docs/python-programming-guide.md                |   11 +
 docs/running-on-yarn.md                         |   47 +-
 docs/scala-programming-guide.md                 |    6 +-
 docs/spark-standalone.md                        |   79 +-
 docs/streaming-programming-guide.md             |   17 +-
 docs/tuning.md                                  |    7 +-
 ec2/spark_ec2.py                                |   68 +-
 examples/pom.xml                                |   56 +-
 .../streaming/examples/JavaKafkaWordCount.java  |   98 ++
 .../apache/spark/examples/BroadcastTest.scala   |   21 +-
 .../org/apache/spark/examples/LocalALS.scala    |    2 +-
 .../spark/examples/MultiBroadcastTest.scala     |   15 +-
 .../org/apache/spark/examples/SparkHdfsLR.scala |    3 +-
 .../org/apache/spark/examples/SparkKMeans.scala |    2 -
 .../org/apache/spark/examples/SparkPi.scala     |    2 +-
 .../org/apache/spark/examples/SparkTC.scala     |    2 +-
 .../streaming/examples/ActorWordCount.scala     |    4 +-
 .../streaming/examples/KafkaWordCount.scala     |   28 +-
 .../streaming/examples/MQTTWordCount.scala      |  107 ++
 .../clickstream/PageViewGenerator.scala         |   13 +-
 mllib/pom.xml                                   |   12 +-
 .../apache/spark/mllib/clustering/KMeans.scala  |   11 +-
 new-yarn/pom.xml                                |  161 +++
 .../spark/deploy/yarn/ApplicationMaster.scala   |  446 ++++++++
 .../yarn/ApplicationMasterArguments.scala       |   94 ++
 .../org/apache/spark/deploy/yarn/Client.scala   |  519 +++++++++
 .../spark/deploy/yarn/ClientArguments.scala     |  148 +++
 .../yarn/ClientDistributedCacheManager.scala    |  228 ++++
 .../spark/deploy/yarn/WorkerLauncher.scala      |  223 ++++
 .../spark/deploy/yarn/WorkerRunnable.scala      |  209 ++++
 .../deploy/yarn/YarnAllocationHandler.scala     |  687 ++++++++++++
 .../spark/deploy/yarn/YarnSparkHadoopUtil.scala |   43 +
 .../cluster/YarnClientClusterScheduler.scala    |   47 +
 .../cluster/YarnClientSchedulerBackend.scala    |  109 ++
 .../cluster/YarnClusterScheduler.scala          |   55 +
 .../ClientDistributedCacheManagerSuite.scala    |  220 ++++
 pom.xml                                         |  249 +++--
 project/SparkBuild.scala                        |   68 +-
 project/plugins.sbt                             |    2 +-
 python/epydoc.conf                              |    2 +-
 python/pyspark/accumulators.py                  |   19 +-
 python/pyspark/context.py                       |  117 +-
 python/pyspark/rdd.py                           |  102 +-
 python/pyspark/serializers.py                   |  301 ++++-
 python/pyspark/tests.py                         |   18 +-
 python/pyspark/worker.py                        |   44 +-
 python/run-tests                                |    1 +
 python/test_support/userlibrary.py              |   17 +
 repl-bin/pom.xml                                |    8 +-
 repl-bin/src/deb/bin/spark-executor             |    2 +-
 repl-bin/src/deb/bin/spark-shell                |    2 +-
 repl/pom.xml                                    |   16 +-
 .../org/apache/spark/repl/SparkILoop.scala      |   22 +-
 .../scala/org/apache/spark/repl/ReplSuite.scala |  173 +--
 spark-class                                     |   29 +-
 spark-class2.cmd                                |    7 +
 .../kafka/0.7.2-spark/kafka-0.7.2-spark.jar     |  Bin 1358063 -> 0 bytes
 .../kafka/0.7.2-spark/kafka-0.7.2-spark.jar.md5 |    1 -
 .../0.7.2-spark/kafka-0.7.2-spark.jar.sha1      |    1 -
 .../kafka/0.7.2-spark/kafka-0.7.2-spark.pom     |    9 -
 .../kafka/0.7.2-spark/kafka-0.7.2-spark.pom.md5 |    1 -
 .../0.7.2-spark/kafka-0.7.2-spark.pom.sha1      |    1 -
 .../apache/kafka/kafka/maven-metadata-local.xml |   12 -
 .../kafka/kafka/maven-metadata-local.xml.md5    |    1 -
 .../kafka/kafka/maven-metadata-local.xml.sha1   |    1 -
 streaming/pom.xml                               |   70 +-
 .../org/apache/spark/streaming/Checkpoint.scala |    8 +-
 .../org/apache/spark/streaming/DStream.scala    |   55 +-
 .../spark/streaming/NetworkInputTracker.scala   |   14 +-
 .../spark/streaming/PairDStreamFunctions.scala  |  154 ++-
 .../spark/streaming/StreamingContext.scala      |   52 +-
 .../spark/streaming/api/java/JavaDStream.scala  |    8 +-
 .../streaming/api/java/JavaDStreamLike.scala    |   97 +-
 .../streaming/api/java/JavaPairDStream.scala    |  186 ++-
 .../api/java/JavaStreamingContext.scala         |  113 +-
 .../streaming/dstream/CoGroupedDStream.scala    |   59 -
 .../streaming/dstream/FlumeInputDStream.scala   |    4 +-
 .../streaming/dstream/KafkaInputDStream.scala   |   62 +-
 .../streaming/dstream/MQTTInputDStream.scala    |  110 ++
 .../streaming/dstream/NetworkInputDStream.scala |   20 +-
 .../streaming/dstream/RawInputDStream.scala     |    4 +-
 .../streaming/dstream/TransformedDStream.scala  |   20 +-
 .../streaming/receivers/ActorReceiver.scala     |    4 +-
 .../streaming/receivers/ZeroMQReceiver.scala    |    4 +-
 .../apache/spark/streaming/JavaAPISuite.java    |  425 ++++++-
 .../apache/spark/streaming/JavaTestUtils.scala  |   36 +-
 .../spark/streaming/BasicOperationsSuite.scala  |  141 ++-
 .../spark/streaming/CheckpointSuite.scala       |    4 +-
 .../spark/streaming/InputStreamsSuite.scala     |   95 +-
 .../apache/spark/streaming/TestSuiteBase.scala  |   63 +-
 tools/pom.xml                                   |   12 +-
 .../tools/JavaAPICompletenessChecker.scala      |    4 +-
 yarn/pom.xml                                    |   58 +-
 .../spark/deploy/yarn/ApplicationMaster.scala   |  226 ++--
 .../org/apache/spark/deploy/yarn/Client.scala   |  385 +++++--
 .../spark/deploy/yarn/ClientArguments.scala     |   55 +-
 .../yarn/ClientDistributedCacheManager.scala    |  228 ++++
 .../spark/deploy/yarn/WorkerLauncher.scala      |  243 ++++
 .../spark/deploy/yarn/WorkerRunnable.scala      |  179 +--
 .../deploy/yarn/YarnAllocationHandler.scala     |  360 +++---
 .../spark/deploy/yarn/YarnSparkHadoopUtil.scala |    5 +-
 .../cluster/YarnClientClusterScheduler.scala    |   47 +
 .../cluster/YarnClientSchedulerBackend.scala    |  109 ++
 .../ClientDistributedCacheManagerSuite.scala    |  220 ++++
 342 files changed, 18856 insertions(+), 7000 deletions(-)
----------------------------------------------------------------------



[06/50] git commit: Merge pull request #199 from harveyfeng/yarn-2.2

Posted by pw...@apache.org.
Merge pull request #199 from harveyfeng/yarn-2.2

Hadoop 2.2 migration

Includes support for the YARN API stabilized in the Hadoop 2.2 release, and a few style patches.

Short description for each set of commits:

a98f5a0 - "Misc style changes in the 'yarn' package"
a67ebf4 - "A few more style fixes in the 'yarn' package"
Both of these are some minor style changes, such as fixing lines over 100 chars, to the existing YARN code.

ab8652f - "Add a 'new-yarn' directory ... "
Copies everything from `SPARK_HOME/yarn` to `SPARK_HOME/new-yarn`. No actual code changes here.

4f1c3fa - "Hadoop 2.2 YARN API migration ..."
API patches to code in the `SPARK_HOME/new-yarn` directory. There are a few more small style changes mixed in, too.
Based on @colorant's Hadoop 2.2 support for the scala-2.10 branch in #141.

a1a1c62 - "Add optional Hadoop 2.2 settings in sbt build ... "
If Spark should be built against Hadoop 2.2, then:
a) the `org.apache.spark.deploy.yarn` package will be compiled from the `new-yarn` directory.
b) Protobuf v2.5 will be used as a Spark dependency, since Hadoop 2.2 depends on it. Also, Spark will be built against a version of Akka v2.0.5 that's built against Protobuf 2.5, named `akka-2.0.5-protobuf-2.5`. The patched Akka is here: https://github.com/harveyfeng/akka/tree/2.0.5-protobuf-2.5, and was published to local Ivy during testing.

There's also a new boolean environment variable, `SPARK_IS_NEW_HADOOP`, that users can manually set if their `SPARK_HADOOP_VERSION` specification does not start with `2.2`, which is how the build file tries to detect a 2.2 version. Not sure if this is necessary or done in the best way, though...


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/72b69615
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/72b69615
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/72b69615

Branch: refs/heads/scala-2.10
Commit: 72b696156c8662cae2cef4b943520b4be86148ea
Parents: 182f9ba 46b87b8
Author: Matei Zaharia <ma...@eecs.berkeley.edu>
Authored: Wed Dec 4 23:33:04 2013 -0800
Committer: Matei Zaharia <ma...@eecs.berkeley.edu>
Committed: Wed Dec 4 23:33:04 2013 -0800

----------------------------------------------------------------------
 core/pom.xml                                    |  10 +-
 .../scala/org/apache/spark/SparkContext.scala   |   2 +-
 new-yarn/pom.xml                                | 161 +++++
 .../spark/deploy/yarn/ApplicationMaster.scala   | 446 ++++++++++++
 .../yarn/ApplicationMasterArguments.scala       |  94 +++
 .../org/apache/spark/deploy/yarn/Client.scala   | 519 ++++++++++++++
 .../spark/deploy/yarn/ClientArguments.scala     | 148 ++++
 .../yarn/ClientDistributedCacheManager.scala    | 228 ++++++
 .../spark/deploy/yarn/WorkerLauncher.scala      | 223 ++++++
 .../spark/deploy/yarn/WorkerRunnable.scala      | 209 ++++++
 .../deploy/yarn/YarnAllocationHandler.scala     | 687 +++++++++++++++++++
 .../spark/deploy/yarn/YarnSparkHadoopUtil.scala |  43 ++
 .../cluster/YarnClientClusterScheduler.scala    |  47 ++
 .../cluster/YarnClientSchedulerBackend.scala    | 109 +++
 .../cluster/YarnClusterScheduler.scala          |  55 ++
 .../ClientDistributedCacheManagerSuite.scala    | 220 ++++++
 pom.xml                                         |  61 +-
 project/SparkBuild.scala                        |  34 +-
 streaming/pom.xml                               |   9 +-
 .../spark/deploy/yarn/ApplicationMaster.scala   | 172 ++---
 .../org/apache/spark/deploy/yarn/Client.scala   | 151 ++--
 .../spark/deploy/yarn/WorkerRunnable.scala      |  85 ++-
 .../deploy/yarn/YarnAllocationHandler.scala     | 346 ++++++----
 23 files changed, 3716 insertions(+), 343 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/72b69615/core/src/main/scala/org/apache/spark/SparkContext.scala
----------------------------------------------------------------------


[32/50] git commit: SPARK-917 Improve API links in nav bar

Posted by pw...@apache.org.
SPARK-917 Improve API links in nav bar


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/dd331a6b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/dd331a6b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/dd331a6b

Branch: refs/heads/scala-2.10
Commit: dd331a6b268993f869ccf9f3ea388b9f9596ef6b
Parents: 10c3c0c
Author: Patrick Wendell <pw...@gmail.com>
Authored: Sat Dec 7 11:49:49 2013 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Sat Dec 7 11:49:49 2013 -0800

----------------------------------------------------------------------
 docs/_layouts/global.html | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/dd331a6b/docs/_layouts/global.html
----------------------------------------------------------------------
diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html
index 0c1d657..ad7969d 100755
--- a/docs/_layouts/global.html
+++ b/docs/_layouts/global.html
@@ -74,12 +74,12 @@
                         <li class="dropdown">
                             <a href="#" class="dropdown-toggle" data-toggle="dropdown">API Docs<b class="caret"></b></a>
                             <ul class="dropdown-menu">
-                                <li><a href="api/core/index.html">Spark Core for Java/Scala</a></li>
+                                <li><a href="api/core/index.html#org.apache.spark.package">Spark Core for Java/Scala</a></li>
                                 <li><a href="api/pyspark/index.html">Spark Core for Python</a></li>
                                 <li class="divider"></li>
-                                <li><a href="api/streaming/index.html">Spark Streaming</a></li>
-                                <li><a href="api/mllib/index.html">MLlib (Machine Learning)</a></li>
-                                <li><a href="api/bagel/index.html">Bagel (Pregel on Spark)</a></li>
+                                <li><a href="api/streaming/index.html#org.apache.spark.streaming.package">Spark Streaming</a></li>
+                                <li><a href="api/mllib/index.html#org.apache.spark.mllib.package">MLlib (Machine Learning)</a></li>
+                                <li><a href="api/bagel/index.html#org.apache.spark.bagel.package">Bagel (Pregel on Spark)</a></li>
                             </ul>
                         </li>
 


[05/50] git commit: Small changes from Matei review

Posted by pw...@apache.org.
Small changes from Matei review


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/1450b8ef
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/1450b8ef
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/1450b8ef

Branch: refs/heads/scala-2.10
Commit: 1450b8ef87d65634800941bbaf259d9b4dd6cc3b
Parents: b1c6fa1
Author: Patrick Wendell <pw...@gmail.com>
Authored: Wed Dec 4 18:49:32 2013 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Wed Dec 4 18:49:32 2013 -0800

----------------------------------------------------------------------
 docs/configuration.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/1450b8ef/docs/configuration.md
----------------------------------------------------------------------
diff --git a/docs/configuration.md b/docs/configuration.md
index 1a3eef3..22abe1c 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -331,7 +331,7 @@ Apart from these, the following properties are also available, and may be useful
   <td>spark.shuffle.consolidateFiles</td>
   <td>false</td>
   <td>
-    If set to "true", consolidates intermediate files created during a shuffle.
+    If set to "true", consolidates intermediate files created during a shuffle. Creating fewer files can improve filesystem performance if you run shuffles with large numbers of reduce tasks.
   </td>
 </tr>
 <tr>
@@ -346,7 +346,7 @@ Apart from these, the following properties are also available, and may be useful
   <td>spark.speculation.interval</td>
   <td>100</td>
   <td>
-    How often Spark will check for tasks to speculate, in seconds.
+    How often Spark will check for tasks to speculate, in milliseconds.
   </td>
 </tr>
 <tr>


[49/50] git commit: A few corrections to documentation.

Posted by pw...@apache.org.
A few corrections to documentation.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/d3090b79
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/d3090b79
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/d3090b79

Branch: refs/heads/scala-2.10
Commit: d3090b79a53355828b5315bd5b05c4187468df02
Parents: f4c73df
Author: Prashant Sharma <pr...@imaginea.com>
Authored: Thu Dec 12 10:12:06 2013 +0530
Committer: Prashant Sharma <pr...@imaginea.com>
Committed: Thu Dec 12 10:12:06 2013 +0530

----------------------------------------------------------------------
 docs/configuration.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/d3090b79/docs/configuration.md
----------------------------------------------------------------------
diff --git a/docs/configuration.md b/docs/configuration.md
index 8cca77e..677d182 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -275,30 +275,30 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td>spark.akka.timeout</td>
-  <td>60</td>
+  <td>100</td>
   <td>
     Communication timeout between Spark nodes, in seconds.
   </td>
 </tr>
 <tr>
   <td>spark.akka.heartbeat.pauses</td>
-  <td>60</td>
+  <td>600</td>
   <td>
-     Acceptable heart beat pause in seconds for akka, tune this if you expect GC pauses or network delays (reconnections) etc.
+     This is set to a larger value to disable failure detector that comes inbuilt akka. It can be enabled again, if you plan to use this feature (Not recommended). Acceptable heart beat pause in seconds for akka. This can be used to control sensitivity to gc pauses. Tune this in combination of `spark.akka.heartbeat.interval` and `spark.akka.failure-detector.threshold` if you need to.
   </td>
 </tr>
 <tr>
   <td>spark.akka.failure-detector.threshold</td>
-  <td>12.0</td>
+  <td>300.0</td>
   <td>
-     Tuning this is not recommended unless you are sure what you are doing. This maps to akka's `akka.remote.transport-failure-detector.threshold`. Usually having a larger value of `spark.akka.pauses` should suffice. 
+     This is set to a larger value to disable failure detector that comes inbuilt akka. It can be enabled again, if you plan to use this feature (Not recommended). This maps to akka's `akka.remote.transport-failure-detector.threshold`. Tune this in combination of `spark.akka.heartbeat.pauses` and `spark.akka.heartbeat.interval` if you need to.
   </td>
 </tr>
 <tr>
   <td>spark.akka.heartbeat.interval</td>
-  <td>5</td>
+  <td>1000</td>
   <td>
-     A larger interval value in seconds reduces network overhead and a smaller value might be more informative for akka's failure detector. Tune this in combination of `spark.akka.heartbeat.pauses` and `spark.akka.failure-detector.threshold` if you need to.
+    This is set to a larger value to disable failure detector that comes inbuilt akka. It can be enabled again, if you plan to use this feature (Not recommended). A larger interval value in seconds reduces network overhead and a smaller value ( ~ 1 s) might be more informative for akka's failure detector. Tune this in combination of `spark.akka.heartbeat.pauses` and `spark.akka.failure-detector.threshold` if you need to. Only positive use case for using failure detector can be, a sensistive failure detector can help evict rogue executors really quick. However this is usually not the case as gc pauses and network lags are expected in a real spark cluster. Apart from that enabling this leads to a lot of exchanges of heart beats between nodes leading to flooding the network with those. 
   </td>
 </tr>
 <tr>


[12/50] git commit: FutureAction result tests

Posted by pw...@apache.org.
FutureAction result tests


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/ee888f6b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/ee888f6b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/ee888f6b

Branch: refs/heads/scala-2.10
Commit: ee888f6b251c4f06f2edf15267d12e42e28fd22f
Parents: aebb123
Author: Mark Hamstra <ma...@gmail.com>
Authored: Thu Dec 5 21:53:40 2013 -0800
Committer: Mark Hamstra <ma...@gmail.com>
Committed: Thu Dec 5 23:01:18 2013 -0800

----------------------------------------------------------------------
 .../apache/spark/rdd/AsyncRDDActionsSuite.scala | 26 ++++++++++++++++++++
 1 file changed, 26 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/ee888f6b/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
index da032b1..0d4c10d 100644
--- a/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
@@ -19,6 +19,8 @@ package org.apache.spark.rdd
 
 import java.util.concurrent.Semaphore
 
+import scala.concurrent.{Await, TimeoutException}
+import scala.concurrent.duration.Duration
 import scala.concurrent.ExecutionContext.Implicits.global
 
 import org.scalatest.{BeforeAndAfterAll, FunSuite}
@@ -173,4 +175,28 @@ class AsyncRDDActionsSuite extends FunSuite with BeforeAndAfterAll with Timeouts
       sem.acquire(2)
     }
   }
+
+  /**
+   * Awaiting FutureAction results
+   */
+  test("FutureAction result, infinite wait") {
+    val f = sc.parallelize(1 to 100, 4)
+              .countAsync()
+    assert(Await.result(f, Duration.Inf) === 100)
+  }
+
+  test("FutureAction result, finite wait") {
+    val f = sc.parallelize(1 to 100, 4)
+              .countAsync()
+    assert(Await.result(f, Duration(30, "seconds")) === 100)
+  }
+
+  test("FutureAction result, timeout") {
+    val f = sc.parallelize(1 to 100, 4)
+              .mapPartitions(itr => { Thread.sleep(20); itr })
+              .countAsync()
+    intercept[TimeoutException] {
+      Await.result(f, Duration(20, "milliseconds"))
+    }
+  }
 }


[21/50] git commit: Updated documentation about the YARN v2.2 build process

Posted by pw...@apache.org.
Updated documentation about the YARN v2.2 build process


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/f2fb4b42
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/f2fb4b42
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/f2fb4b42

Branch: refs/heads/scala-2.10
Commit: f2fb4b422863059476816df07ca7ea18f62e3a9d
Parents: 5d46025
Author: Ali Ghodsi <al...@cs.berkeley.edu>
Authored: Fri Dec 6 00:43:12 2013 -0800
Committer: Ali Ghodsi <al...@cs.berkeley.edu>
Committed: Fri Dec 6 16:31:26 2013 -0800

----------------------------------------------------------------------
 docs/building-with-maven.md | 4 ++++
 docs/index.md               | 2 +-
 docs/running-on-yarn.md     | 8 ++++++++
 3 files changed, 13 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/f2fb4b42/docs/building-with-maven.md
----------------------------------------------------------------------
diff --git a/docs/building-with-maven.md b/docs/building-with-maven.md
index 19c01e1..a508786 100644
--- a/docs/building-with-maven.md
+++ b/docs/building-with-maven.md
@@ -45,6 +45,10 @@ For Apache Hadoop 2.x, 0.23.x, Cloudera CDH MRv2, and other Hadoop versions with
     # Cloudera CDH 4.2.0 with MapReduce v2
     $ mvn -Phadoop2-yarn -Dhadoop.version=2.0.0-cdh4.2.0 -Dyarn.version=2.0.0-chd4.2.0 -DskipTests clean package
 
+Hadoop versions 2.2.x and newer can be built by setting the ```new-yarn``` and the ```yarn.version``` as follows:
+       mvn -Dyarn.version=2.2.0 -Dhadoop.version=2.2.0 -Pnew-yarn
+
+The build process handles Hadoop 2.2.x as a special case that uses the directory ```new-yarn```, which supports the new YARN API. Furthermore, for this version, the build depends on artifacts published by the spark-project to enable Akka 2.0.5 to work with protobuf 2.5. 
 
 ## Spark Tests in Maven ##
 

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/f2fb4b42/docs/index.md
----------------------------------------------------------------------
diff --git a/docs/index.md b/docs/index.md
index bd386a8..56e1142 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -56,7 +56,7 @@ Hadoop, you must build Spark against the same version that your cluster uses.
 By default, Spark links to Hadoop 1.0.4. You can change this by setting the
 `SPARK_HADOOP_VERSION` variable when compiling:
 
-    SPARK_HADOOP_VERSION=1.2.1 sbt/sbt assembly
+    SPARK_HADOOP_VERSION=2.2.0 sbt/sbt assembly
 
 In addition, if you wish to run Spark on [YARN](running-on-yarn.md), set
 `SPARK_YARN` to `true`:

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/f2fb4b42/docs/running-on-yarn.md
----------------------------------------------------------------------
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 68fd6c2..3ec656c 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -17,6 +17,7 @@ This can be built by setting the Hadoop version and `SPARK_YARN` environment var
 The assembled JAR will be something like this:
 `./assembly/target/scala-{{site.SCALA_VERSION}}/spark-assembly_{{site.SPARK_VERSION}}-hadoop2.0.5.jar`.
 
+The build process now also supports new YARN versions (2.2.x). See below.
 
 # Preparations
 
@@ -111,9 +112,16 @@ For example:
     SPARK_YARN_APP_JAR=examples/target/scala-{{site.SCALA_VERSION}}/spark-examples-assembly-{{site.SPARK_VERSION}}.jar \
     MASTER=yarn-client ./spark-shell
 
+# Building Spark for Hadoop/YARN 2.2.x
+
+Hadoop 2.2.x users must build Spark and publish it locally. The SBT build process handles Hadoop 2.2.x as a special case. This version of Hadoop has new YARN API changes and depends on a Protobuf version (2.5) that is not compatible with the Akka version (2.0.5) that Spark uses. Therefore, if the Hadoop version (e.g. set through ```SPARK_HADOOP_VERSION```) starts with 2.2.0 or higher then the build process will depend on Akka artifacts distributed by the Spark project compatible with Protobuf 2.5. Furthermore, the build process then uses the directory ```new-yarn``` (stead of ```yarn```), which supports the new YARN API. The build process should seamlessly work out of the box. 
+
+See [Building Spark with Maven](building-with-maven.md) for instructions on how to build Spark using the Maven process.
+
 # Important Notes
 
 - We do not requesting container resources based on the number of cores. Thus the numbers of cores given via command line arguments cannot be guaranteed.
 - The local directories used for spark will be the local directories configured for YARN (Hadoop Yarn config yarn.nodemanager.local-dirs). If the user specifies spark.local.dir, it will be ignored.
 - The --files and --archives options support specifying file names with the # similar to Hadoop. For example you can specify: --files localtest.txt#appSees.txt and this will upload the file you have locally named localtest.txt into HDFS but this will be linked to by the name appSees.txt and your application should use the name as appSees.txt to reference it when running on YARN.
 - The --addJars option allows the SparkContext.addJar function to work if you are using it with local files. It does not need to be used if you are using it with HDFS, HTTP, HTTPS, or FTP files.
+- YARN 2.2.x users cannot simply depend on the Spark packages without building Spark, as the published Spark artifacts are compiled to work with the pre 2.2 API. Those users must build Spark and publish it locally.  
\ No newline at end of file


[02/50] git commit: Merge pull request #225 from ash211/patch-3

Posted by pw...@apache.org.
Merge pull request #225 from ash211/patch-3

Add missing space after "Serialized" in StorageLevel

Current code creates outputs like:

scala> res0.getStorageLevel.description
res2: String = Serialized1x Replicated


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/b9e7609f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/b9e7609f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/b9e7609f

Branch: refs/heads/scala-2.10
Commit: b9e7609f2c6f5cbafd0111a4feccad313327ac89
Parents: 055462c 2176116
Author: Reynold Xin <rx...@apache.org>
Authored: Wed Dec 4 14:42:09 2013 -0800
Committer: Reynold Xin <rx...@apache.org>
Committed: Wed Dec 4 14:42:09 2013 -0800

----------------------------------------------------------------------
 core/src/main/scala/org/apache/spark/storage/StorageLevel.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------



[04/50] git commit: Document missing configs and set shuffle consolidation to false.

Posted by pw...@apache.org.
Document missing configs and set shuffle consolidation to false.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/b1c6fa15
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/b1c6fa15
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/b1c6fa15

Branch: refs/heads/scala-2.10
Commit: b1c6fa1584099b3a1e0615c100f10ea90b1ad2c9
Parents: 380b90b
Author: Patrick Wendell <pw...@gmail.com>
Authored: Wed Dec 4 18:39:34 2013 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Wed Dec 4 18:39:34 2013 -0800

----------------------------------------------------------------------
 .../spark/storage/ShuffleBlockManager.scala     |  2 +-
 docs/configuration.md                           | 37 +++++++++++++++++++-
 2 files changed, 37 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/b1c6fa15/core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala
index 2f1b049..e828e1d 100644
--- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala
@@ -62,7 +62,7 @@ class ShuffleBlockManager(blockManager: BlockManager) {
   // Turning off shuffle file consolidation causes all shuffle Blocks to get their own file.
   // TODO: Remove this once the shuffle file consolidation feature is stable.
   val consolidateShuffleFiles =
-    System.getProperty("spark.shuffle.consolidateFiles", "true").toBoolean
+    System.getProperty("spark.shuffle.consolidateFiles", "false").toBoolean
 
   private val bufferSize = System.getProperty("spark.shuffle.file.buffer.kb", "100").toInt * 1024
 

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/b1c6fa15/docs/configuration.md
----------------------------------------------------------------------
diff --git a/docs/configuration.md b/docs/configuration.md
index 97183ba..1a3eef3 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -327,7 +327,42 @@ Apart from these, the following properties are also available, and may be useful
     Too large a value decreases parallelism during broadcast (makes it slower); however, if it is too small, <code>BlockManager</code> might take a performance hit.
   </td>
 </tr>
-
+<tr>
+  <td>spark.shuffle.consolidateFiles</td>
+  <td>false</td>
+  <td>
+    If set to "true", consolidates intermediate files created during a shuffle.
+  </td>
+</tr>
+<tr>
+<tr>
+  <td>spark.speculation</td>
+  <td>false</td>
+  <td>
+    If set to "true", performs speculative execution of tasks. This means if one or more tasks are running slowly in a stage, they will be re-launched.
+  </td>
+</tr>
+<tr>
+  <td>spark.speculation.interval</td>
+  <td>100</td>
+  <td>
+    How often Spark will check for tasks to speculate, in seconds.
+  </td>
+</tr>
+<tr>
+  <td>spark.speculation.quantile</td>
+  <td>0.75</td>
+  <td>
+    Percentage of tasks which must be complete before speculation is enabled for a particular stage.
+  </td>
+</tr>
+<tr>
+  <td>spark.speculation.multiplier</td>
+  <td>1.5</td>
+  <td>
+    How many times slower a task is than the median to be considered for speculation.
+  </td>
+</tr>
 </table>
 
 # Environment Variables


[13/50] git commit: Merge pull request #232 from markhamstra/FiniteWait

Posted by pw...@apache.org.
Merge pull request #232 from markhamstra/FiniteWait

jobWaiter.synchronized before jobWaiter.wait

...else ``IllegalMonitorStateException`` in ``SimpleFutureAction#ready``.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/07804987
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/07804987
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/07804987

Branch: refs/heads/scala-2.10
Commit: 078049877e123fe7e4c4553e36055de572cab7c4
Parents: 5d46025 ee888f6
Author: Reynold Xin <rx...@apache.org>
Authored: Thu Dec 5 23:29:42 2013 -0800
Committer: Reynold Xin <rx...@apache.org>
Committed: Thu Dec 5 23:29:42 2013 -0800

----------------------------------------------------------------------
 .../scala/org/apache/spark/FutureAction.scala   |  2 +-
 .../org/apache/spark/scheduler/JobWaiter.scala  |  1 +
 .../apache/spark/rdd/AsyncRDDActionsSuite.scala | 26 ++++++++++++++++++++
 3 files changed, 28 insertions(+), 1 deletion(-)
----------------------------------------------------------------------



[36/50] git commit: Adding HDP 2.0 version

Posted by pw...@apache.org.
Adding HDP 2.0 version


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/b3e87c0f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/b3e87c0f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/b3e87c0f

Branch: refs/heads/scala-2.10
Commit: b3e87c0f5115f7f54726f9dab694879be4bc298f
Parents: 41c60b3
Author: Patrick Wendell <pw...@gmail.com>
Authored: Sat Dec 7 22:26:49 2013 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Sat Dec 7 22:31:46 2013 -0800

----------------------------------------------------------------------
 docs/hadoop-third-party-distributions.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/b3e87c0f/docs/hadoop-third-party-distributions.md
----------------------------------------------------------------------
diff --git a/docs/hadoop-third-party-distributions.md b/docs/hadoop-third-party-distributions.md
index 92d2c95..268944f 100644
--- a/docs/hadoop-third-party-distributions.md
+++ b/docs/hadoop-third-party-distributions.md
@@ -39,7 +39,8 @@ the _exact_ Hadoop version you are running to avoid any compatibility errors.
         <tr><td>HDP 1.3</td><td>1.2.0</td></tr>
         <tr><td>HDP 1.2</td><td>1.1.2</td></tr>
         <tr><td>HDP 1.1</td><td>1.0.3</td></tr>
-        <tr><td>HDP 1.0</td><td>1.0.3</td></tr>
+	<tr><td>HDP 1.0</td><td>1.0.3</td></tr>
+        <tr><td>HDP 2.0</td><td>2.2.0</td></tr>
       </table>
     </td>
   </tr>


[03/50] git commit: Merge pull request #227 from pwendell/master

Posted by pw...@apache.org.
Merge pull request #227 from pwendell/master

Fix small bug in web UI and minor clean-up.

There was a bug where sorting order didn't work correctly for write time metrics.

I also cleaned up some earlier code that fixed the same issue for read and
write bytes.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/182f9bae
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/182f9bae
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/182f9bae

Branch: refs/heads/scala-2.10
Commit: 182f9baeed8e4cc62ca14ae04413394477a7ccfb
Parents: b9e7609 380b90b
Author: Patrick Wendell <pw...@gmail.com>
Authored: Wed Dec 4 15:52:07 2013 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Wed Dec 4 15:52:07 2013 -0800

----------------------------------------------------------------------
 .../org/apache/spark/ui/jobs/StagePage.scala    | 29 +++++++++-----------
 1 file changed, 13 insertions(+), 16 deletions(-)
----------------------------------------------------------------------