You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@spark.apache.org by pw...@apache.org on 2014/01/04 08:50:39 UTC

[01/10] git commit: Deleted py4j jar and added to assembly dependency

Updated Branches:
  refs/heads/master c4d6145f7 -> 10fe23bc3


Deleted py4j jar and added to assembly dependency


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/8821c3a5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/8821c3a5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/8821c3a5

Branch: refs/heads/master
Commit: 8821c3a5262d6893d2a1fd6ed86afd1213114b4d
Parents: c1d928a
Author: Prashant Sharma <sc...@gmail.com>
Authored: Thu Jan 2 13:07:52 2014 +0530
Committer: Prashant Sharma <sc...@gmail.com>
Committed: Thu Jan 2 13:09:46 2014 +0530

----------------------------------------------------------------------
 assembly/lib/PY4J_LICENSE.txt                   |  27 -------------------
 assembly/lib/PY4J_VERSION.txt                   |   1 -
 assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.jar  | Bin 103286 -> 0 bytes
 assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom  |   9 -------
 .../net/sf/py4j/py4j/maven-metadata-local.xml   |  12 ---------
 assembly/pom.xml                                |   2 +-
 project/SparkBuild.scala                        |   1 +
 7 files changed, 2 insertions(+), 50 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/8821c3a5/assembly/lib/PY4J_LICENSE.txt
----------------------------------------------------------------------
diff --git a/assembly/lib/PY4J_LICENSE.txt b/assembly/lib/PY4J_LICENSE.txt
deleted file mode 100644
index a70279c..0000000
--- a/assembly/lib/PY4J_LICENSE.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-
-Copyright (c) 2009-2011, Barthelemy Dagenais All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-- Redistributions of source code must retain the above copyright notice, this
-list of conditions and the following disclaimer.
-
-- Redistributions in binary form must reproduce the above copyright notice,
-this list of conditions and the following disclaimer in the documentation
-and/or other materials provided with the distribution.
-
-- The name of the author may not be used to endorse or promote products
-derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/8821c3a5/assembly/lib/PY4J_VERSION.txt
----------------------------------------------------------------------
diff --git a/assembly/lib/PY4J_VERSION.txt b/assembly/lib/PY4J_VERSION.txt
deleted file mode 100644
index 04a0cd5..0000000
--- a/assembly/lib/PY4J_VERSION.txt
+++ /dev/null
@@ -1 +0,0 @@
-b7924aabe9c5e63f0a4d8bbd17019534c7ec014e

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/8821c3a5/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.jar
----------------------------------------------------------------------
diff --git a/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.jar b/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.jar
deleted file mode 100644
index 73b7ddb..0000000
Binary files a/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.jar and /dev/null differ

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/8821c3a5/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom
----------------------------------------------------------------------
diff --git a/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom b/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom
deleted file mode 100644
index 1c730e1..0000000
--- a/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom
+++ /dev/null
@@ -1,9 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0"
-    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-  <modelVersion>4.0.0</modelVersion>
-  <groupId>net.sf.py4j</groupId>
-  <artifactId>py4j</artifactId>
-  <version>0.7</version>
-  <description>POM was created from install:install-file</description>
-</project>

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/8821c3a5/assembly/lib/net/sf/py4j/py4j/maven-metadata-local.xml
----------------------------------------------------------------------
diff --git a/assembly/lib/net/sf/py4j/py4j/maven-metadata-local.xml b/assembly/lib/net/sf/py4j/py4j/maven-metadata-local.xml
deleted file mode 100644
index 6942ff4..0000000
--- a/assembly/lib/net/sf/py4j/py4j/maven-metadata-local.xml
+++ /dev/null
@@ -1,12 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<metadata>
-  <groupId>net.sf.py4j</groupId>
-  <artifactId>py4j</artifactId>
-  <versioning>
-    <release>0.7</release>
-    <versions>
-      <version>0.7</version>
-    </versions>
-    <lastUpdated>20130828020333</lastUpdated>
-  </versioning>
-</metadata>

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/8821c3a5/assembly/pom.xml
----------------------------------------------------------------------
diff --git a/assembly/pom.xml b/assembly/pom.xml
index fc2adc1..c19e4bc 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -67,7 +67,7 @@
     <dependency>
       <groupId>net.sf.py4j</groupId>
       <artifactId>py4j</artifactId>
-      <version>0.7</version>
+      <version>0.8.1</version>
     </dependency>
   </dependencies>
 

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/8821c3a5/project/SparkBuild.scala
----------------------------------------------------------------------
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index b3b5fc7..2d2d635 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -338,6 +338,7 @@ object SparkBuild extends Build {
   )
 
   def assemblyProjSettings = sharedSettings ++ Seq(
+    libraryDependencies += "net.sf.py4j" % "py4j" % "0.8.1",
     name := "spark-assembly",
     assembleDeps in Compile <<= (packageProjects.map(packageBin in Compile in _) ++ Seq(packageDependency in Compile)).dependOn,
     jarName in assembly <<= version map { v => "spark-assembly-" + v + "-hadoop" + hadoopVersion + ".jar" },

[07/10] git commit: Restored the previously removed test

Posted by pw...@apache.org.

Restored the previously removed test


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/bc311bb8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/bc311bb8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/bc311bb8

Branch: refs/heads/master
Commit: bc311bb826b5548b9c4c55320711f3b18dc19397
Parents: 94f2fff
Author: Prashant Sharma <pr...@imaginea.com>
Authored: Fri Jan 3 14:52:37 2014 +0530
Committer: Prashant Sharma <pr...@imaginea.com>
Committed: Fri Jan 3 14:52:37 2014 +0530

----------------------------------------------------------------------
 .../test/scala/org/apache/spark/FileServerSuite.scala  | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/bc311bb8/core/src/test/scala/org/apache/spark/FileServerSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/FileServerSuite.scala b/core/src/test/scala/org/apache/spark/FileServerSuite.scala
index a15c375..506f748 100644
--- a/core/src/test/scala/org/apache/spark/FileServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileServerSuite.scala
@@ -55,7 +55,7 @@ class FileServerSuite extends FunSuite with LocalSparkContext {
     in.close()
     jar.close()
     stream.close()
-    testJarFile = tmpFile2.getAbsolutePath
+    testJarFile = tmpFile2.toURI.toURL.toString
   }
 
   override def beforeEach() {
@@ -142,4 +142,15 @@ class FileServerSuite extends FunSuite with LocalSparkContext {
     }
   }
 
+  test ("Dynamically adding JARS on a standalone cluster using local: URL") {
+    sc = new SparkContext("local-cluster[1,1,512]", "test")
+    sc.addJar(testJarFile.replace("file", "local"))
+    val testData = Array((1,1))
+    sc.parallelize(testData).foreach { (x) =>
+      if (Thread.currentThread.getContextClassLoader.getResource("FileServerSuite2.txt") == null) {
+        throw new SparkException("jar not added")
+      }
+    }
+  }
+
 }

[02/10] git commit: Removed sbt folder and changed docs accordingly

Posted by pw...@apache.org.

Removed sbt folder and changed docs accordingly


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/6be4c111
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/6be4c111
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/6be4c111

Branch: refs/heads/master
Commit: 6be4c1119493dea2af9734ad8b59fcded31f2676
Parents: 8821c3a
Author: Prashant Sharma <sc...@gmail.com>
Authored: Thu Jan 2 14:09:37 2014 +0530
Committer: Prashant Sharma <sc...@gmail.com>
Committed: Thu Jan 2 14:09:37 2014 +0530

----------------------------------------------------------------------
 README.md                                |  30 +++++++++++++-----
 docs/README.md                           |   4 +--
 docs/_plugins/copy_api_dirs.rb           |   4 +--
 docs/api.md                              |   2 +-
 docs/hadoop-third-party-distributions.md |   2 +-
 docs/index.md                            |   6 ++--
 docs/python-programming-guide.md         |   2 +-
 docs/quick-start.md                      |   2 +-
 docs/running-on-yarn.md                  |   6 ++--
 docs/scala-programming-guide.md          |   2 +-
 make-distribution.sh                     |  12 +++++--
 pyspark                                  |   2 +-
 run-example                              |   2 +-
 sbt/sbt                                  |  43 --------------------------
 sbt/sbt-launch-0.11.3-2.jar              | Bin 1096763 -> 0 bytes
 sbt/sbt.cmd                              |  25 ---------------
 spark-class                              |   2 +-
 17 files changed, 51 insertions(+), 95 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/6be4c111/README.md
----------------------------------------------------------------------
diff --git a/README.md b/README.md
index 1550a8b..22e7ab8 100644
--- a/README.md
+++ b/README.md
@@ -13,9 +13,9 @@ This README file only contains basic setup instructions.
 ## Building
 
 Spark requires Scala 2.10. The project is built using Simple Build Tool (SBT),
-which is packaged with it. To build Spark and its example programs, run:
+which can be obtained from [here](http://www.scala-sbt.org/release/docs/Getting-Started/Setup.html). To build Spark and its example programs, run:
 
-    sbt/sbt assembly
+    sbt assembly
 
 Once you've built Spark, the easiest way to start using it is the shell:
 
@@ -36,6 +36,22 @@ All of the Spark samples take a `<master>` parameter that is the cluster URL
 to connect to. This can be a mesos:// or spark:// URL, or "local" to run
 locally with one thread, or "local[N]" to run locally with N threads.
 
+## Running tests
+
+### With sbt. (you need sbt installed)
+Once you have built spark with `sbt assembly` mentioned in [Building](#Building) section. Test suits can be run as follows on *nix based systems using sbt.
+
+`SPARK_HOME=$(pwd) SPARK_TESTING=1 sbt test`
+ 
+TODO: figure out instructions for windows.
+ 
+### With maven.
+
+1. Build assembly by
+`mvn package -DskipTests`
+
+2. Run tests
+`mvn test`
 
 ## A Note About Hadoop Versions
 
@@ -49,22 +65,22 @@ For Apache Hadoop versions 1.x, Cloudera CDH MRv1, and other Hadoop
 versions without YARN, use:
 
     # Apache Hadoop 1.2.1
-    $ SPARK_HADOOP_VERSION=1.2.1 sbt/sbt assembly
+    $ SPARK_HADOOP_VERSION=1.2.1 sbt assembly
 
     # Cloudera CDH 4.2.0 with MapReduce v1
-    $ SPARK_HADOOP_VERSION=2.0.0-mr1-cdh4.2.0 sbt/sbt assembly
+    $ SPARK_HADOOP_VERSION=2.0.0-mr1-cdh4.2.0 sbt assembly
 
 For Apache Hadoop 2.2.X, 2.1.X, 2.0.X, 0.23.x, Cloudera CDH MRv2, and other Hadoop versions
 with YARN, also set `SPARK_YARN=true`:
 
     # Apache Hadoop 2.0.5-alpha
-    $ SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true sbt/sbt assembly
+    $ SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true sbt assembly
 
     # Cloudera CDH 4.2.0 with MapReduce v2
-    $ SPARK_HADOOP_VERSION=2.0.0-cdh4.2.0 SPARK_YARN=true sbt/sbt assembly
+    $ SPARK_HADOOP_VERSION=2.0.0-cdh4.2.0 SPARK_YARN=true sbt assembly
 
     # Apache Hadoop 2.2.X and newer
-    $ SPARK_HADOOP_VERSION=2.2.0 SPARK_YARN=true sbt/sbt assembly
+    $ SPARK_HADOOP_VERSION=2.2.0 SPARK_YARN=true sbt assembly
 
 When developing a Spark application, specify the Hadoop version by adding the
 "hadoop-client" artifact to your project's dependencies. For example, if you're

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/6be4c111/docs/README.md
----------------------------------------------------------------------
diff --git a/docs/README.md b/docs/README.md
index dfcf753..e3d6c9a 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -27,10 +27,10 @@ To mark a block of code in your markdown to be syntax highlighted by jekyll duri
 
 ## API Docs (Scaladoc and Epydoc)
 
-You can build just the Spark scaladoc by running `sbt/sbt doc` from the SPARK_PROJECT_ROOT directory.
+You can build just the Spark scaladoc by running `sbt doc` from the SPARK_PROJECT_ROOT directory.
 
 Similarly, you can build just the PySpark epydoc by running `epydoc --config epydoc.conf` from the SPARK_PROJECT_ROOT/pyspark directory.
 
-When you run `jekyll` in the docs directory, it will also copy over the scaladoc for the various Spark subprojects into the docs directory (and then also into the _site directory). We use a jekyll plugin to run `sbt/sbt doc` before building the site so if you haven't run it (recently) it may take some time as it generates all of the scaladoc.  The jekyll plugin also generates the PySpark docs using [epydoc](http://epydoc.sourceforge.net/).
+When you run `jekyll` in the docs directory, it will also copy over the scaladoc for the various Spark subprojects into the docs directory (and then also into the _site directory). We use a jekyll plugin to run `sbt doc` before building the site so if you haven't run it (recently) it may take some time as it generates all of the scaladoc.  The jekyll plugin also generates the PySpark docs using [epydoc](http://epydoc.sourceforge.net/).
 
 NOTE: To skip the step of building and copying over the Scala and Python API docs, run `SKIP_API=1 jekyll`.

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/6be4c111/docs/_plugins/copy_api_dirs.rb
----------------------------------------------------------------------
diff --git a/docs/_plugins/copy_api_dirs.rb b/docs/_plugins/copy_api_dirs.rb
index 431de90..ef9912c 100644
--- a/docs/_plugins/copy_api_dirs.rb
+++ b/docs/_plugins/copy_api_dirs.rb
@@ -26,8 +26,8 @@ if not (ENV['SKIP_API'] == '1' or ENV['SKIP_SCALADOC'] == '1')
   curr_dir = pwd
   cd("..")
 
-  puts "Running sbt/sbt doc from " + pwd + "; this may take a few minutes..."
-  puts `sbt/sbt doc`
+  puts "Running sbt doc from " + pwd + "; this may take a few minutes..."
+  puts `sbt doc`
 
   puts "Moving back into docs dir."
   cd("docs")

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/6be4c111/docs/api.md
----------------------------------------------------------------------
diff --git a/docs/api.md b/docs/api.md
index e86d077..11e2c15 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -3,7 +3,7 @@ layout: global
 title: Spark API documentation (Scaladoc)
 ---
 
-Here you can find links to the Scaladoc generated for the Spark sbt subprojects.  If the following links don't work, try running `sbt/sbt doc` from the Spark project home directory.
+Here you can find links to the Scaladoc generated for the Spark sbt subprojects.  If the following links don't work, try running `sbt doc` from the Spark project home directory.
 
 - [Spark](api/core/index.html)
 - [Spark Examples](api/examples/index.html)

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/6be4c111/docs/hadoop-third-party-distributions.md
----------------------------------------------------------------------
diff --git a/docs/hadoop-third-party-distributions.md b/docs/hadoop-third-party-distributions.md
index de6a2b0..141d475 100644
--- a/docs/hadoop-third-party-distributions.md
+++ b/docs/hadoop-third-party-distributions.md
@@ -12,7 +12,7 @@ with these distributions:
 When compiling Spark, you'll need to 
 [set the SPARK_HADOOP_VERSION flag](index.html#a-note-about-hadoop-versions):
 
-    SPARK_HADOOP_VERSION=1.0.4 sbt/sbt assembly
+    SPARK_HADOOP_VERSION=1.0.4 sbt assembly
 
 The table below lists the corresponding `SPARK_HADOOP_VERSION` code for each CDH/HDP release. Note that
 some Hadoop releases are binary compatible across client versions. This means the pre-built Spark

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/6be4c111/docs/index.md
----------------------------------------------------------------------
diff --git a/docs/index.md b/docs/index.md
index d3ac696..5278e33 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -17,7 +17,7 @@ Spark runs on both Windows and UNIX-like systems (e.g. Linux, Mac OS). All you n
 
 Spark uses [Simple Build Tool](http://www.scala-sbt.org), which is bundled with it. To compile the code, go into the top-level Spark directory and run
 
-    sbt/sbt assembly
+    sbt assembly
 
 For its Scala API, Spark {{site.SPARK_VERSION}} depends on Scala {{site.SCALA_VERSION}}. If you write applications in Scala, you will need to use this same version of Scala in your own program -- newer major versions may not work. You can get the right version of Scala from [scala-lang.org](http://www.scala-lang.org/download/).
 
@@ -56,12 +56,12 @@ Hadoop, you must build Spark against the same version that your cluster uses.
 By default, Spark links to Hadoop 1.0.4. You can change this by setting the
 `SPARK_HADOOP_VERSION` variable when compiling:
 
-    SPARK_HADOOP_VERSION=2.2.0 sbt/sbt assembly
+    SPARK_HADOOP_VERSION=2.2.0 sbt assembly
 
 In addition, if you wish to run Spark on [YARN](running-on-yarn.html), set
 `SPARK_YARN` to `true`:
 
-    SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true sbt/sbt assembly
+    SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true sbt assembly
 
 Note that on Windows, you need to set the environment variables on separate lines, e.g., `set SPARK_HADOOP_VERSION=1.2.1`.
 

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/6be4c111/docs/python-programming-guide.md
----------------------------------------------------------------------
diff --git a/docs/python-programming-guide.md b/docs/python-programming-guide.md
index 55e39b1..a33977e 100644
--- a/docs/python-programming-guide.md
+++ b/docs/python-programming-guide.md
@@ -69,7 +69,7 @@ The script automatically adds the `pyspark` package to the `PYTHONPATH`.
 The `pyspark` script launches a Python interpreter that is configured to run PySpark applications. To use `pyspark` interactively, first build Spark, then launch it directly from the command line without any options:
 
 {% highlight bash %}
-$ sbt/sbt assembly
+$ sbt assembly
 $ ./pyspark
 {% endhighlight %}
 

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/6be4c111/docs/quick-start.md
----------------------------------------------------------------------
diff --git a/docs/quick-start.md b/docs/quick-start.md
index 8f782db..5c55def 100644
--- a/docs/quick-start.md
+++ b/docs/quick-start.md
@@ -12,7 +12,7 @@ See the [programming guide](scala-programming-guide.html) for a more complete re
 To follow along with this guide, you only need to have successfully built Spark on one machine. Simply go into your Spark directory and run:
 
 {% highlight bash %}
-$ sbt/sbt assembly
+$ sbt assembly
 {% endhighlight %}
 
 # Interactive Analysis with the Spark Shell

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/6be4c111/docs/running-on-yarn.md
----------------------------------------------------------------------
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index aa75ca4..13d5fd3 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -12,7 +12,7 @@ was added to Spark in version 0.6.0, and improved in 0.7.0 and 0.8.0.
 We need a consolidated Spark JAR (which bundles all the required dependencies) to run Spark jobs on a YARN cluster.
 This can be built by setting the Hadoop version and `SPARK_YARN` environment variable, as follows:
 
-    SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true ./sbt/sbt assembly
+    SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true ./sbt assembly
 
 The assembled JAR will be something like this:
 `./assembly/target/scala-{{site.SCALA_VERSION}}/spark-assembly_{{site.SPARK_VERSION}}-hadoop2.0.5.jar`.
@@ -25,7 +25,7 @@ The build process now also supports new YARN versions (2.2.x). See below.
 - The assembled jar can be installed into HDFS or used locally.
 - Your application code must be packaged into a separate JAR file.
 
-If you want to test out the YARN deployment mode, you can use the current Spark examples. A `spark-examples_{{site.SCALA_VERSION}}-{{site.SPARK_VERSION}}` file can be generated by running `sbt/sbt assembly`. NOTE: since the documentation you're reading is for Spark version {{site.SPARK_VERSION}}, we are assuming here that you have downloaded Spark {{site.SPARK_VERSION}} or checked it out of source control. If you are using a different version of Spark, the version numbers in the jar generated by the sbt package command will obviously be different.
+If you want to test out the YARN deployment mode, you can use the current Spark examples. A `spark-examples_{{site.SCALA_VERSION}}-{{site.SPARK_VERSION}}` file can be generated by running `sbt assembly`. NOTE: since the documentation you're reading is for Spark version {{site.SPARK_VERSION}}, we are assuming here that you have downloaded Spark {{site.SPARK_VERSION}} or checked it out of source control. If you are using a different version of Spark, the version numbers in the jar generated by the sbt package command will obviously be different.
 
 # Configuration
 
@@ -72,7 +72,7 @@ The command to launch the YARN Client is as follows:
 For example:
 
     # Build the Spark assembly JAR and the Spark examples JAR
-    $ SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true ./sbt/sbt assembly
+    $ SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true ./sbt assembly
 
     # Configure logging
     $ cp conf/log4j.properties.template conf/log4j.properties

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/6be4c111/docs/scala-programming-guide.md
----------------------------------------------------------------------
diff --git a/docs/scala-programming-guide.md b/docs/scala-programming-guide.md
index 56d2a3a..3e7075c 100644
--- a/docs/scala-programming-guide.md
+++ b/docs/scala-programming-guide.md
@@ -31,7 +31,7 @@ In addition, if you wish to access an HDFS cluster, you need to add a dependency
     artifactId = hadoop-client
     version = <your-hdfs-version>
 
-For other build systems, you can run `sbt/sbt assembly` to pack Spark and its dependencies into one JAR (`assembly/target/scala-{{site.SCALA_VERSION}}/spark-assembly-{{site.SPARK_VERSION}}-hadoop*.jar`), then add this to your CLASSPATH. Set the HDFS version as described [here](index.html#a-note-about-hadoop-versions).
+For other build systems, you can run `sbt assembly` to pack Spark and its dependencies into one JAR (`assembly/target/scala-{{site.SCALA_VERSION}}/spark-assembly-{{site.SPARK_VERSION}}-hadoop*.jar`), then add this to your CLASSPATH. Set the HDFS version as described [here](index.html#a-note-about-hadoop-versions).
 
 Finally, you need to import some Spark classes and implicit conversions into your program. Add the following lines:
 

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/6be4c111/make-distribution.sh
----------------------------------------------------------------------
diff --git a/make-distribution.sh b/make-distribution.sh
index 32bbdb9..a2c8e64 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -43,7 +43,13 @@ DISTDIR="$FWDIR/dist"
 
 # Get version from SBT
 export TERM=dumb   # Prevents color codes in SBT output
-VERSION=$($FWDIR/sbt/sbt "show version" | tail -1 | cut -f 2 | sed 's/^\([a-zA-Z0-9.-]*\).*/\1/')
+
+if ! test `which sbt` ;then
+    echo -e "You need sbt installed and available on path, please follow the instructions here: http://www.scala-sbt.org/release/docs/Getting-Started/Setup.html"
+    exit -1;
+fi
+
+VERSION=$(sbt "show version" | tail -1 | cut -f 2 | sed 's/^\([a-zA-Z0-9.-]*\).*/\1/')
 
 # Initialize defaults
 SPARK_HADOOP_VERSION=1.0.4
@@ -83,7 +89,9 @@ fi
 # Build fat JAR
 export SPARK_HADOOP_VERSION
 export SPARK_YARN
-"$FWDIR/sbt/sbt" "assembly/assembly"
+cd $FWDIR
+
+"sbt" "assembly/assembly"
 
 # Make directories
 rm -rf "$DISTDIR"

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/6be4c111/pyspark
----------------------------------------------------------------------
diff --git a/pyspark b/pyspark
index 12cc926..1d003e2 100755
--- a/pyspark
+++ b/pyspark
@@ -31,7 +31,7 @@ if [ ! -f "$FWDIR/RELEASE" ]; then
   ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar >& /dev/null
   if [[ $? != 0 ]]; then
     echo "Failed to find Spark assembly in $FWDIR/assembly/target" >&2
-    echo "You need to build Spark with sbt/sbt assembly before running this program" >&2
+    echo "You need to build Spark with sbt assembly before running this program" >&2
     exit 1
   fi
 fi

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/6be4c111/run-example
----------------------------------------------------------------------
diff --git a/run-example b/run-example
index a78192d..fbd81fe 100755
--- a/run-example
+++ b/run-example
@@ -55,7 +55,7 @@ if [ -e "$EXAMPLES_DIR"/target/spark-examples*[0-9Tg].jar ]; then
 fi
 if [[ -z $SPARK_EXAMPLES_JAR ]]; then
   echo "Failed to find Spark examples assembly in $FWDIR/examples/target" >&2
-  echo "You need to build Spark with sbt/sbt assembly before running this program" >&2
+  echo "You need to build Spark with sbt assembly before running this program" >&2
   exit 1
 fi
 

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/6be4c111/sbt/sbt
----------------------------------------------------------------------
diff --git a/sbt/sbt b/sbt/sbt
deleted file mode 100755
index 5942280..0000000
--- a/sbt/sbt
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env bash
-
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-cygwin=false
-case "`uname`" in
-    CYGWIN*) cygwin=true;;
-esac
-
-EXTRA_ARGS="-Xmx1200m -XX:MaxPermSize=350m -XX:ReservedCodeCacheSize=256m"
-if [ "$MESOS_HOME" != "" ]; then
-  EXTRA_ARGS="$EXTRA_ARGS -Djava.library.path=$MESOS_HOME/lib/java"
-fi
-
-export SPARK_HOME=$(cd "$(dirname $0)/.." 2>&1 >/dev/null ; pwd)
-export SPARK_TESTING=1  # To put test classes on classpath
-
-SBT_JAR="$SPARK_HOME"/sbt/sbt-launch-*.jar
-if $cygwin; then
-    SBT_JAR=`cygpath -w $SBT_JAR`
-    export SPARK_HOME=`cygpath -w $SPARK_HOME`
-    EXTRA_ARGS="$EXTRA_ARGS -Djline.terminal=jline.UnixTerminal -Dsbt.cygwin=true"
-    stty -icanon min 1 -echo > /dev/null 2>&1
-    java $EXTRA_ARGS $SBT_OPTS -jar $SBT_JAR "$@"
-    stty icanon echo > /dev/null 2>&1
-else
-    java $EXTRA_ARGS $SBT_OPTS -jar $SBT_JAR "$@"
-fi
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/6be4c111/sbt/sbt-launch-0.11.3-2.jar
----------------------------------------------------------------------
diff --git a/sbt/sbt-launch-0.11.3-2.jar b/sbt/sbt-launch-0.11.3-2.jar
deleted file mode 100644
index 23e5c3f..0000000
Binary files a/sbt/sbt-launch-0.11.3-2.jar and /dev/null differ

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/6be4c111/sbt/sbt.cmd
----------------------------------------------------------------------
diff --git a/sbt/sbt.cmd b/sbt/sbt.cmd
deleted file mode 100644
index 681fe00..0000000
--- a/sbt/sbt.cmd
+++ /dev/null
@@ -1,25 +0,0 @@
-@echo off
-
-rem
-rem Licensed to the Apache Software Foundation (ASF) under one or more
-rem contributor license agreements.  See the NOTICE file distributed with
-rem this work for additional information regarding copyright ownership.
-rem The ASF licenses this file to You under the Apache License, Version 2.0
-rem (the "License"); you may not use this file except in compliance with
-rem the License.  You may obtain a copy of the License at
-rem
-rem    http://www.apache.org/licenses/LICENSE-2.0
-rem
-rem Unless required by applicable law or agreed to in writing, software
-rem distributed under the License is distributed on an "AS IS" BASIS,
-rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-rem See the License for the specific language governing permissions and
-rem limitations under the License.
-rem
-
-set EXTRA_ARGS=
-if not "%MESOS_HOME%x"=="x" set EXTRA_ARGS=-Djava.library.path=%MESOS_HOME%\lib\java
-
-set SPARK_HOME=%~dp0..
-
-java -Xmx1200M -XX:MaxPermSize=200m -XX:ReservedCodeCacheSize=256m %EXTRA_ARGS% -jar %SPARK_HOME%\sbt\sbt-launch-0.11.3-2.jar "%*"

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/6be4c111/spark-class
----------------------------------------------------------------------
diff --git a/spark-class b/spark-class
index 1858ea6..254ddee 100755
--- a/spark-class
+++ b/spark-class
@@ -104,7 +104,7 @@ if [ ! -f "$FWDIR/RELEASE" ]; then
   jars_list=$(ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/ | grep "spark-assembly.*hadoop.*.jar")
   if [ "$num_jars" -eq "0" ]; then
     echo "Failed to find Spark assembly in $FWDIR/assembly/target/scala-$SCALA_VERSION/" >&2
-    echo "You need to build Spark with 'sbt/sbt assembly' before running this program." >&2
+    echo "You need to build Spark with 'sbt assembly' before running this program." >&2
     exit 1
   fi
   if [ "$num_jars" -gt "1" ]; then

[10/10] git commit: Merge pull request #329 from pwendell/remove-binaries

Posted by pw...@apache.org.

Merge pull request #329 from pwendell/remove-binaries

SPARK-1002: Remove Binaries from Spark Source

This adds a few changes on top of the work by @scrapcodes.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/10fe23bc
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/10fe23bc
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/10fe23bc

Branch: refs/heads/master
Commit: 10fe23bc34bcc8c25fb5440b71a6be883523e4dc
Parents: c4d6145 604fad9
Author: Patrick Wendell <pw...@gmail.com>
Authored: Fri Jan 3 23:50:14 2014 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Fri Jan 3 23:50:14 2014 -0800

----------------------------------------------------------------------
 README.md                                       |  20 ++--
 assembly/lib/PY4J_LICENSE.txt                   |  27 -----
 assembly/lib/PY4J_VERSION.txt                   |   1 -
 assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.jar  | Bin 103286 -> 0 bytes
 assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom  |   9 --
 .../net/sf/py4j/py4j/maven-metadata-local.xml   |  12 ---
 assembly/pom.xml                                |   2 +-
 bin/pyspark                                     |   2 +-
 bin/run-example                                 |   2 +-
 bin/spark-class                                 |   2 +-
 .../scala/org/apache/spark/SparkContext.scala   |   8 +-
 .../test/resources/uncommons-maths-1.2.2.jar    | Bin 49019 -> 0 bytes
 .../scala/org/apache/spark/DriverSuite.scala    |   8 +-
 .../org/apache/spark/FileServerSuite.scala      | 108 ++++++++++---------
 .../deploy/worker/ExecutorRunnerTest.scala      |   4 +-
 docs/README.md                                  |   4 +-
 docs/_plugins/copy_api_dirs.rb                  |   4 +-
 docs/api.md                                     |   2 +-
 docs/hadoop-third-party-distributions.md        |   2 +-
 docs/index.md                                   |   6 +-
 docs/python-programming-guide.md                |   2 +-
 docs/quick-start.md                             |   2 +-
 docs/running-on-yarn.md                         |   6 +-
 docs/scala-programming-guide.md                 |   2 +-
 make-distribution.sh                            |  13 ++-
 project/SparkBuild.scala                        |  16 ++-
 python/lib/py4j-0.8.1-src.zip                   | Bin 0 -> 37662 bytes
 python/lib/py4j0.7.egg                          | Bin 191756 -> 0 bytes
 python/pyspark/__init__.py                      |   2 +-
 .../org/apache/spark/repl/SparkILoop.scala      |   4 +-
 sbt/sbt                                         |  43 --------
 sbt/sbt-launch-0.11.3-2.jar                     | Bin 1096763 -> 0 bytes
 sbt/sbt.cmd                                     |  25 -----
 33 files changed, 128 insertions(+), 210 deletions(-)
----------------------------------------------------------------------

[08/10] git commit: Changes on top of Prashant's patch.

Posted by pw...@apache.org.

Changes on top of Prashant's patch.

Closes #316


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/9e6f3bdc
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/9e6f3bdc
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/9e6f3bdc

Branch: refs/heads/master
Commit: 9e6f3bdcda1ab48159afa4f54b64d05e42a8688e
Parents: bc311bb
Author: Patrick Wendell <pw...@gmail.com>
Authored: Fri Jan 3 17:32:25 2014 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Fri Jan 3 18:30:17 2014 -0800

----------------------------------------------------------------------
 README.md                                       |  19 +----
 .../scala/org/apache/spark/SparkContext.scala   |   3 +-
 .../scala/org/apache/spark/DriverSuite.scala    |   3 +-
 .../org/apache/spark/FileServerSuite.scala      |  77 ++++++++-----------
 .../deploy/worker/ExecutorRunnerTest.scala      |   6 +-
 make-distribution.sh                            |   3 +-
 project/SparkBuild.scala                        |   1 +
 python/lib/py4j-0.8.1-src.zip                   | Bin 0 -> 37662 bytes
 python/lib/py4j-0.8.1.zip                       | Bin 809541 -> 0 bytes
 python/pyspark/__init__.py                      |   2 +-
 10 files changed, 42 insertions(+), 72 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/9e6f3bdc/README.md
----------------------------------------------------------------------
diff --git a/README.md b/README.md
index 8f68674..873ec98 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@ This README file only contains basic setup instructions.
 ## Building
 
 Spark requires Scala 2.10. The project is built using Simple Build Tool (SBT),
-which can be obtained from [here](http://www.scala-sbt.org). To build Spark and its example programs, run:
+which can be obtained [here](http://www.scala-sbt.org). To build Spark and its example programs, run:
 
     sbt assembly
 
@@ -38,24 +38,11 @@ locally with one thread, or "local[N]" to run locally with N threads.
 
 ## Running tests
 
-### With sbt (Much faster to run compared to maven)
-Once you have built spark with `sbt assembly` mentioned in [Building](#Building) section. Test suits can be run as follows using sbt.
+Testing first requires [Building](#Building) Spark. Once Spark is built, tests
+can be run using:
 
 `sbt test`
  
-### With maven.
-1. Export these necessary environment variables as follows.
-
- `export SCALA_HOME=<scala distribution>`
-
- `export MAVEN_OPTS="-Xmx1512m -XX:MaxPermSize=512m"`
-
-2. Build assembly by
-`mvn package -DskipTests`
-
-3. Run tests
-`mvn test`
-
 ## A Note About Hadoop Versions
 
 Spark uses the Hadoop core library to talk to HDFS and other Hadoop-supported

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/9e6f3bdc/core/src/main/scala/org/apache/spark/SparkContext.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index c6f6883..e80e43a 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -173,7 +173,8 @@ class SparkContext(
       value <- Option(System.getenv(key))) {
     executorEnvs(key) = value
   }
-  // A workaround for SPARK_TESTING and SPARK_HOME
+  // Convert java options to env vars as a work around
+  // since we can't set env vars directly in sbt.
   for { (envKey, propKey) <- Seq(("SPARK_HOME", "spark.home"), ("SPARK_TESTING", "spark.testing"))
     value <- Option(System.getenv(envKey)).orElse(Option(System.getProperty(propKey)))} {
     executorEnvs(envKey) = value

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/9e6f3bdc/core/src/test/scala/org/apache/spark/DriverSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/DriverSuite.scala b/core/src/test/scala/org/apache/spark/DriverSuite.scala
index 89c5631..7e1e55f 100644
--- a/core/src/test/scala/org/apache/spark/DriverSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DriverSuite.scala
@@ -30,8 +30,7 @@ import org.apache.spark.util.Utils
 
 class DriverSuite extends FunSuite with Timeouts {
   test("driver should exit after finishing") {
-    val sparkHome = Option(System.getenv("SPARK_HOME"))
-      .orElse(Option(System.getProperty("spark.home"))).get
+    val sparkHome = sys.env.get("SPARK_HOME").orElse(sys.props.get("spark.home")).get
     // Regression test for SPARK-530: "Spark driver process doesn't exit after finishing"
     val masters = Table(("master"), ("local"), ("local-cluster[2,1,512]"))
     forAll(masters) { (master: String) =>

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/9e6f3bdc/core/src/test/scala/org/apache/spark/FileServerSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/FileServerSuite.scala b/core/src/test/scala/org/apache/spark/FileServerSuite.scala
index 506f748..a2eb9a4 100644
--- a/core/src/test/scala/org/apache/spark/FileServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileServerSuite.scala
@@ -27,54 +27,39 @@ import org.scalatest.FunSuite
 class FileServerSuite extends FunSuite with LocalSparkContext {
 
   @transient var tmpFile: File = _
-  @transient var testJarFile: String = _
-
+  @transient var tmpJarUrl: String = _
 
   override def beforeAll() {
     super.beforeAll()
-    val buffer = new Array[Byte](10240)
-    val tmpdir = new File(Files.createTempDir(), "test")
-    tmpdir.mkdir()
-    val tmpJarEntry = new File(tmpdir, "FileServerSuite2.txt")
-    val pw = new PrintWriter(tmpJarEntry)
-    pw.println("test String in the file named FileServerSuite2.txt")
+    val tmpDir = new File(Files.createTempDir(), "test")
+    tmpDir.mkdir()
+
+    val textFile = new File(tmpDir, "FileServerSuite.txt")
+    val pw = new PrintWriter(textFile)
+    pw.println("100")
     pw.close()
-    // The ugliest code possible, was translated from java.
-    val tmpFile2 = new File(tmpdir, "test.jar")
-    val stream = new FileOutputStream(tmpFile2)
-    val jar = new JarOutputStream(stream, new java.util.jar.Manifest())
-    val jarAdd = new JarEntry(tmpJarEntry.getName)
-    jarAdd.setTime(tmpJarEntry.lastModified)
-    jar.putNextEntry(jarAdd)
-    val in = new FileInputStream(tmpJarEntry)
+    
+    val jarFile = new File(tmpDir, "test.jar")
+    val jarStream = new FileOutputStream(jarFile)
+    val jar = new JarOutputStream(jarStream, new java.util.jar.Manifest())
+
+    val jarEntry = new JarEntry(textFile.getName)
+    jar.putNextEntry(jarEntry)
+    
+    val in = new FileInputStream(textFile)
+    val buffer = new Array[Byte](10240)
     var nRead = 0
-      while (nRead <= 0) {
+    while (nRead <= 0) {
       nRead = in.read(buffer, 0, buffer.length)
       jar.write(buffer, 0, nRead)
     }
+
     in.close()
     jar.close()
-    stream.close()
-    testJarFile = tmpFile2.toURI.toURL.toString
-  }
-
-  override def beforeEach() {
-    super.beforeEach()
-    // Create a sample text file
-    val tmpdir = new File(Files.createTempDir(), "test")
-    tmpdir.mkdir()
-    tmpFile = new File(tmpdir, "FileServerSuite.txt")
-    val pw = new PrintWriter(tmpFile)
-    pw.println("100")
-    pw.close()
-  }
+    jarStream.close()
 
-  override def afterEach() {
-    super.afterEach()
-    // Clean up downloaded file
-    if (tmpFile.exists) {
-      tmpFile.delete()
-    }
+    tmpFile = textFile
+    tmpJarUrl = jarFile.toURI.toURL.toString
   }
 
   test("Distributing files locally") {
@@ -108,10 +93,10 @@ class FileServerSuite extends FunSuite with LocalSparkContext {
 
   test ("Dynamically adding JARS locally") {
     sc = new SparkContext("local[4]", "test")
-    sc.addJar(testJarFile)
+    sc.addJar(tmpJarUrl)
     val testData = Array((1, 1))
-    sc.parallelize(testData).foreach { (x) =>
-      if (Thread.currentThread.getContextClassLoader.getResource("FileServerSuite2.txt") == null) {
+    sc.parallelize(testData).foreach { x =>
+      if (Thread.currentThread.getContextClassLoader.getResource("FileServerSuite.txt") == null) {
         throw new SparkException("jar not added")
       }
     }
@@ -133,10 +118,10 @@ class FileServerSuite extends FunSuite with LocalSparkContext {
 
   test ("Dynamically adding JARS on a standalone cluster") {
     sc = new SparkContext("local-cluster[1,1,512]", "test")
-    sc.addJar(testJarFile)
+    sc.addJar(tmpJarUrl)
     val testData = Array((1,1))
-    sc.parallelize(testData).foreach { (x) =>
-      if (Thread.currentThread.getContextClassLoader.getResource("FileServerSuite2.txt") == null) {
+    sc.parallelize(testData).foreach { x =>
+      if (Thread.currentThread.getContextClassLoader.getResource("FileServerSuite.txt") == null) {
         throw new SparkException("jar not added")
       }
     }
@@ -144,10 +129,10 @@ class FileServerSuite extends FunSuite with LocalSparkContext {
 
   test ("Dynamically adding JARS on a standalone cluster using local: URL") {
     sc = new SparkContext("local-cluster[1,1,512]", "test")
-    sc.addJar(testJarFile.replace("file", "local"))
+    sc.addJar(tmpJarUrl.replace("file", "local"))
     val testData = Array((1,1))
-    sc.parallelize(testData).foreach { (x) =>
-      if (Thread.currentThread.getContextClassLoader.getResource("FileServerSuite2.txt") == null) {
+    sc.parallelize(testData).foreach { x =>
+      if (Thread.currentThread.getContextClassLoader.getResource("FileServerSuite.txt") == null) {
         throw new SparkException("jar not added")
       }
     }

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/9e6f3bdc/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala b/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala
index 894a722..f58b1ee 100644
--- a/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala
@@ -19,18 +19,14 @@ package org.apache.spark.deploy.worker
 
 import java.io.File
 
-import scala.util.Try
-
 import org.scalatest.FunSuite
 
 import org.apache.spark.deploy.{ExecutorState, Command, ApplicationDescription}
 
-
 class ExecutorRunnerTest extends FunSuite {
   test("command includes appId") {
     def f(s:String) = new File(s)
-    val sparkHome = Try(sys.env("SPARK_HOME")).toOption
-      .orElse(Option(System.getProperty("spark.home"))).get
+    val sparkHome = sys.env.get("SPARK_HOME").orElse(sys.env.get("spark.home")).get
     val appDesc = new ApplicationDescription("app name", 8, 500, Command("foo", Seq(),Map()),
       sparkHome, "appUiUrl")
     val appId = "12345-worker321-9876"

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/9e6f3bdc/make-distribution.sh
----------------------------------------------------------------------
diff --git a/make-distribution.sh b/make-distribution.sh
index a2c8e64..8ae8a4c 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -45,7 +45,8 @@ DISTDIR="$FWDIR/dist"
 export TERM=dumb   # Prevents color codes in SBT output
 
 if ! test `which sbt` ;then
-    echo -e "You need sbt installed and available on path, please follow the instructions here: http://www.scala-sbt.org/release/docs/Getting-Started/Setup.html"
+    echo -e "You need sbt installed and available on your path."
+    echo -e "Download sbt from http://www.scala-sbt.org/"
     exit -1;
 fi
 

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/9e6f3bdc/project/SparkBuild.scala
----------------------------------------------------------------------
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 8290e7c..5f57c96 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -72,6 +72,7 @@ object SparkBuild extends Build {
   val sparkHome = System.getProperty("user.dir")
   System.setProperty("spark.home", sparkHome)
   System.setProperty("spark.testing", "1")
+  
   // Allows build configuration to be set through environment variables
   lazy val hadoopVersion = Properties.envOrElse("SPARK_HADOOP_VERSION", DEFAULT_HADOOP_VERSION)
   lazy val isNewHadoop = Properties.envOrNone("SPARK_IS_NEW_HADOOP") match {

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/9e6f3bdc/python/lib/py4j-0.8.1-src.zip
----------------------------------------------------------------------
diff --git a/python/lib/py4j-0.8.1-src.zip b/python/lib/py4j-0.8.1-src.zip
new file mode 100644
index 0000000..2069a32
Binary files /dev/null and b/python/lib/py4j-0.8.1-src.zip differ

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/9e6f3bdc/python/lib/py4j-0.8.1.zip
----------------------------------------------------------------------
diff --git a/python/lib/py4j-0.8.1.zip b/python/lib/py4j-0.8.1.zip
deleted file mode 100644
index 3231e31..0000000
Binary files a/python/lib/py4j-0.8.1.zip and /dev/null differ

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/9e6f3bdc/python/pyspark/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py
index 2b2c3a0..a51d5af 100644
--- a/python/pyspark/__init__.py
+++ b/python/pyspark/__init__.py
@@ -40,7 +40,7 @@ Public classes:
 
 import sys
 import os
-sys.path.insert(0, os.path.join(os.environ["SPARK_HOME"], "python/lib/py4j0.7.egg"))
+sys.path.insert(0, os.path.join(os.environ["SPARK_HOME"], "python/lib/py4j-0.8.1-src.zip"))
 
 
 from pyspark.conf import SparkConf

[05/10] git commit: Merge branch 'master' into spark-1002-remove-jars

Posted by pw...@apache.org.

Merge branch 'master' into spark-1002-remove-jars


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/b4bb8000
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/b4bb8000
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/b4bb8000

Branch: refs/heads/master
Commit: b4bb80002bbf0ac3642c78ae9e5c260b5da4a4cc
Parents: 08ec10d 498a5f0
Author: Prashant Sharma <pr...@imaginea.com>
Authored: Fri Jan 3 12:12:04 2014 +0530
Committer: Prashant Sharma <pr...@imaginea.com>
Committed: Fri Jan 3 12:12:04 2014 +0530

----------------------------------------------------------------------
 .../scala/org/apache/spark/Accumulators.scala   |   8 +-
 .../org/apache/spark/MapOutputTracker.scala     |  11 +-
 .../scala/org/apache/spark/Partitioner.scala    |   4 +-
 .../main/scala/org/apache/spark/SparkConf.scala | 189 ++++++++++
 .../scala/org/apache/spark/SparkContext.scala   | 267 +++++++++-----
 .../main/scala/org/apache/spark/SparkEnv.scala  |  54 +--
 .../spark/api/java/JavaSparkContext.scala       |  35 +-
 .../org/apache/spark/api/python/PythonRDD.scala |   4 +-
 .../org/apache/spark/broadcast/Broadcast.scala  |   8 +-
 .../spark/broadcast/BroadcastFactory.scala      |   4 +-
 .../apache/spark/broadcast/HttpBroadcast.scala  |  43 ++-
 .../spark/broadcast/TorrentBroadcast.scala      |  45 +--
 .../spark/deploy/FaultToleranceTest.scala       |   4 +-
 .../apache/spark/deploy/LocalSparkCluster.scala |   7 +-
 .../apache/spark/deploy/SparkHadoopUtil.scala   |  14 +-
 .../org/apache/spark/deploy/client/Client.scala |  13 +-
 .../apache/spark/deploy/client/TestClient.scala |  10 +-
 .../org/apache/spark/deploy/master/Master.scala |  41 ++-
 .../spark/deploy/master/MasterArguments.scala   |  11 +-
 .../deploy/master/SparkZooKeeperSession.scala   |   7 +-
 .../master/ZooKeeperLeaderElectionAgent.scala   |   9 +-
 .../master/ZooKeeperPersistenceEngine.scala     |   8 +-
 .../spark/deploy/master/ui/MasterWebUI.scala    |   2 +-
 .../org/apache/spark/deploy/worker/Worker.scala |  34 +-
 .../spark/deploy/worker/ui/WorkerWebUI.scala    |   6 +-
 .../executor/CoarseGrainedExecutorBackend.scala |   6 +-
 .../org/apache/spark/executor/Executor.scala    |  45 +--
 .../org/apache/spark/io/CompressionCodec.scala  |  19 +-
 .../apache/spark/metrics/MetricsSystem.scala    |  10 +-
 .../spark/network/ConnectionManager.scala       |  22 +-
 .../org/apache/spark/network/ReceiverTest.scala |  12 +-
 .../org/apache/spark/network/SenderTest.scala   |  16 +-
 .../spark/network/netty/ShuffleCopier.scala     |  10 +-
 .../org/apache/spark/rdd/CheckpointRDD.scala    |   5 +-
 .../org/apache/spark/rdd/CoGroupedRDD.scala     |   2 +-
 .../spark/rdd/PartitionerAwareUnionRDD.scala    | 110 ++++++
 .../main/scala/org/apache/spark/rdd/RDD.scala   |   3 +-
 .../apache/spark/rdd/RDDCheckpointData.scala    |   2 +-
 .../org/apache/spark/rdd/ShuffledRDD.scala      |   2 +-
 .../org/apache/spark/rdd/SubtractedRDD.scala    |   2 +-
 .../apache/spark/scheduler/DAGScheduler.scala   |   3 +-
 .../spark/scheduler/InputFormatInfo.scala       |  14 +-
 .../org/apache/spark/scheduler/ResultTask.scala |   4 +-
 .../spark/scheduler/SchedulableBuilder.scala    |   6 +-
 .../spark/scheduler/SchedulerBackend.scala      |   3 -
 .../apache/spark/scheduler/ShuffleMapTask.scala |   6 +-
 .../spark/scheduler/TaskResultGetter.scala      |   3 +-
 .../spark/scheduler/TaskSchedulerImpl.scala     |  25 +-
 .../apache/spark/scheduler/TaskSetManager.scala |  23 +-
 .../cluster/CoarseGrainedSchedulerBackend.scala |  20 +-
 .../cluster/SimrSchedulerBackend.scala          |   4 +-
 .../cluster/SparkDeploySchedulerBackend.scala   |   8 +-
 .../mesos/CoarseMesosSchedulerBackend.scala     |  14 +-
 .../cluster/mesos/MesosSchedulerBackend.scala   |   8 +-
 .../spark/scheduler/local/LocalBackend.scala    |   3 +-
 .../spark/serializer/JavaSerializer.scala       |   3 +-
 .../spark/serializer/KryoSerializer.scala       |  14 +-
 .../spark/serializer/SerializerManager.scala    |  12 +-
 .../spark/storage/BlockFetcherIterator.scala    |   4 +-
 .../org/apache/spark/storage/BlockManager.scala |  58 +--
 .../spark/storage/BlockManagerMaster.scala      |  11 +-
 .../spark/storage/BlockManagerMasterActor.scala |  14 +-
 .../spark/storage/BlockObjectWriter.scala       |   5 +-
 .../apache/spark/storage/DiskBlockManager.scala |   2 +-
 .../spark/storage/ShuffleBlockManager.scala     |  10 +-
 .../spark/storage/StoragePerfTester.scala       |   2 +-
 .../apache/spark/storage/ThreadingTest.scala    |   8 +-
 .../scala/org/apache/spark/ui/SparkUI.scala     |   4 +-
 .../apache/spark/ui/UIWorkloadGenerator.scala   |  17 +-
 .../org/apache/spark/ui/env/EnvironmentUI.scala |  15 +-
 .../spark/ui/jobs/JobProgressListener.scala     |   4 +-
 .../scala/org/apache/spark/util/AkkaUtils.scala |  25 +-
 .../org/apache/spark/util/MetadataCleaner.scala |  35 +-
 .../org/apache/spark/util/SizeEstimator.scala   |  14 +-
 .../scala/org/apache/spark/util/Utils.scala     |  25 +-
 core/src/test/resources/spark.conf              |   8 +
 .../org/apache/spark/CheckpointSuite.scala      | 361 +++++++++++--------
 .../apache/spark/MapOutputTrackerSuite.scala    |  16 +-
 .../org/apache/spark/SharedSparkContext.scala   |   4 +-
 .../scala/org/apache/spark/SparkConfSuite.scala | 110 ++++++
 .../apache/spark/io/CompressionCodecSuite.scala |   8 +-
 .../spark/metrics/MetricsSystemSuite.scala      |   8 +-
 .../scala/org/apache/spark/rdd/RDDSuite.scala   |  27 ++
 .../spark/scheduler/ClusterSchedulerSuite.scala |   2 +-
 .../spark/scheduler/DAGSchedulerSuite.scala     |  23 +-
 .../apache/spark/scheduler/JobLoggerSuite.scala |   2 +-
 .../spark/scheduler/TaskResultGetterSuite.scala |   6 +-
 .../spark/scheduler/TaskSetManagerSuite.scala   |   4 +-
 .../spark/serializer/KryoSerializerSuite.scala  |  29 +-
 .../spark/storage/BlockManagerSuite.scala       |  97 ++---
 .../spark/storage/DiskBlockManagerSuite.scala   |  18 +-
 .../apache/spark/util/SizeEstimatorSuite.scala  |   2 +-
 docs/_config.yml                                |   2 +-
 docs/configuration.md                           |  71 +++-
 docs/css/bootstrap.min.css                      |   2 +-
 docs/job-scheduling.md                          |  21 +-
 docs/monitoring.md                              |   3 +-
 docs/python-programming-guide.md                |  15 +-
 docs/quick-start.md                             |  52 ++-
 docs/running-on-mesos.md                        |  19 +-
 docs/scala-programming-guide.md                 |   4 +-
 docs/spark-standalone.md                        |  15 +-
 docs/streaming-programming-guide.md             |   4 +-
 docs/tuning.md                                  |  21 +-
 .../examples/bagel/WikipediaPageRank.scala      |  10 +-
 .../bagel/WikipediaPageRankStandalone.scala     |   8 +-
 .../streaming/examples/ActorWordCount.scala     |   3 +-
 .../apache/spark/mllib/recommendation/ALS.scala |  13 +-
 .../spark/deploy/yarn/ApplicationMaster.scala   |  84 ++---
 .../org/apache/spark/deploy/yarn/Client.scala   |  42 ++-
 .../spark/deploy/yarn/ClientArguments.scala     |   3 +-
 .../spark/deploy/yarn/WorkerLauncher.scala      |  13 +-
 .../deploy/yarn/YarnAllocationHandler.scala     |  27 +-
 .../cluster/YarnClientSchedulerBackend.scala    |   4 +-
 project/SparkBuild.scala                        |   1 +
 python/epydoc.conf                              |   2 +-
 python/pyspark/__init__.py                      |  32 +-
 python/pyspark/broadcast.py                     |  11 +
 python/pyspark/conf.py                          | 171 +++++++++
 python/pyspark/context.py                       |  59 ++-
 python/pyspark/java_gateway.py                  |   1 +
 python/pyspark/rdd.py                           |  66 +++-
 python/run-tests                                |   3 +-
 .../org/apache/spark/repl/SparkILoop.scala      |  17 +-
 .../org/apache/spark/repl/SparkIMain.scala      |   7 +-
 .../org/apache/spark/streaming/Checkpoint.scala |  18 +-
 .../org/apache/spark/streaming/DStream.scala    |   2 +-
 .../spark/streaming/PairDStreamFunctions.scala  |  13 +-
 .../spark/streaming/StreamingContext.scala      |  55 ++-
 .../streaming/api/java/JavaPairDStream.scala    |  18 +-
 .../api/java/JavaStreamingContext.scala         |   9 +
 .../streaming/dstream/NetworkInputDStream.scala |   6 +-
 .../streaming/dstream/ShuffledDStream.scala     |   9 +-
 .../streaming/dstream/WindowedDStream.scala     |  16 +-
 .../streaming/scheduler/JobGenerator.scala      |   8 +-
 .../streaming/scheduler/JobScheduler.scala      |   4 +-
 .../spark/streaming/util/RawTextSender.scala    |   4 +-
 .../apache/spark/streaming/JavaAPISuite.java    |  10 +-
 .../spark/streaming/BasicOperationsSuite.scala  |   8 +-
 .../spark/streaming/CheckpointSuite.scala       |  15 +-
 .../spark/streaming/InputStreamsSuite.scala     |  18 +-
 .../apache/spark/streaming/TestSuiteBase.scala  |  34 +-
 .../spark/streaming/WindowOperationsSuite.scala |   5 +-
 .../spark/deploy/yarn/ApplicationMaster.scala   |  97 ++---
 .../org/apache/spark/deploy/yarn/Client.scala   |  54 +--
 .../spark/deploy/yarn/ClientArguments.scala     |   3 +-
 .../spark/deploy/yarn/WorkerLauncher.scala      |  16 +-
 .../deploy/yarn/YarnAllocationHandler.scala     |  25 +-
 .../cluster/YarnClientSchedulerBackend.scala    |   4 +-
 149 files changed, 2291 insertions(+), 1221 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/b4bb8000/docs/python-programming-guide.md
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/b4bb8000/docs/quick-start.md
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/b4bb8000/docs/scala-programming-guide.md
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/b4bb8000/project/SparkBuild.scala
----------------------------------------------------------------------

[06/10] git commit: fixed review comments

Posted by pw...@apache.org.

fixed review comments


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/94f2fffa
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/94f2fffa
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/94f2fffa

Branch: refs/heads/master
Commit: 94f2fffa23436ed66a24c705f88dabe59bf54037
Parents: b4bb800
Author: Prashant Sharma <pr...@imaginea.com>
Authored: Fri Jan 3 14:37:42 2014 +0530
Committer: Prashant Sharma <pr...@imaginea.com>
Committed: Fri Jan 3 14:43:37 2014 +0530

----------------------------------------------------------------------
 README.md                                       |  19 +++++++++++--------
 .../scala/org/apache/spark/SparkContext.scala   |   7 ++++++-
 .../scala/org/apache/spark/DriverSuite.scala    |   9 ++++++---
 .../deploy/worker/ExecutorRunnerTest.scala      |   8 +++++++-
 docs/running-on-yarn.md                         |   4 ++--
 project/SparkBuild.scala                        |  14 +++++++++-----
 python/lib/py4j-0.8.1.zip                       | Bin 0 -> 809541 bytes
 python/lib/py4j0.7.egg                          | Bin 191756 -> 0 bytes
 .../org/apache/spark/repl/SparkILoop.scala      |   4 +++-
 9 files changed, 44 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/94f2fffa/README.md
----------------------------------------------------------------------
diff --git a/README.md b/README.md
index 22e7ab8..8f68674 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@ This README file only contains basic setup instructions.
 ## Building
 
 Spark requires Scala 2.10. The project is built using Simple Build Tool (SBT),
-which can be obtained from [here](http://www.scala-sbt.org/release/docs/Getting-Started/Setup.html). To build Spark and its example programs, run:
+which can be obtained from [here](http://www.scala-sbt.org). To build Spark and its example programs, run:
 
     sbt assembly
 
@@ -38,19 +38,22 @@ locally with one thread, or "local[N]" to run locally with N threads.
 
 ## Running tests
 
-### With sbt. (you need sbt installed)
-Once you have built spark with `sbt assembly` mentioned in [Building](#Building) section. Test suits can be run as follows on *nix based systems using sbt.
+### With sbt (Much faster to run compared to maven)
+Once you have built spark with `sbt assembly` mentioned in [Building](#Building) section. Test suits can be run as follows using sbt.
 
-`SPARK_HOME=$(pwd) SPARK_TESTING=1 sbt test`
- 
-TODO: figure out instructions for windows.
+`sbt test`
  
 ### With maven.
+1. Export these necessary environment variables as follows.
+
+ `export SCALA_HOME=<scala distribution>`
+
+ `export MAVEN_OPTS="-Xmx1512m -XX:MaxPermSize=512m"`
 
-1. Build assembly by
+2. Build assembly by
 `mvn package -DskipTests`
 
-2. Run tests
+3. Run tests
 `mvn test`
 
 ## A Note About Hadoop Versions

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/94f2fffa/core/src/main/scala/org/apache/spark/SparkContext.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 4d6a97e..c6f6883 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -169,10 +169,15 @@ class SparkContext(
   // Environment variables to pass to our executors
   private[spark] val executorEnvs = HashMap[String, String]()
   // Note: SPARK_MEM is included for Mesos, but overwritten for standalone mode in ExecutorRunner
-  for (key <- Seq("SPARK_CLASSPATH", "SPARK_LIBRARY_PATH", "SPARK_JAVA_OPTS", "SPARK_TESTING");
+  for (key <- Seq("SPARK_CLASSPATH", "SPARK_LIBRARY_PATH", "SPARK_JAVA_OPTS");
       value <- Option(System.getenv(key))) {
     executorEnvs(key) = value
   }
+  // A workaround for SPARK_TESTING and SPARK_HOME
+  for { (envKey, propKey) <- Seq(("SPARK_HOME", "spark.home"), ("SPARK_TESTING", "spark.testing"))
+    value <- Option(System.getenv(envKey)).orElse(Option(System.getProperty(propKey)))} {
+    executorEnvs(envKey) = value
+  }
   // Since memory can be set with a system property too, use that
   executorEnvs("SPARK_MEM") = executorMemory + "m"
   executorEnvs ++= conf.getExecutorEnv

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/94f2fffa/core/src/test/scala/org/apache/spark/DriverSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/DriverSuite.scala b/core/src/test/scala/org/apache/spark/DriverSuite.scala
index 6d1695e..89c5631 100644
--- a/core/src/test/scala/org/apache/spark/DriverSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DriverSuite.scala
@@ -30,13 +30,16 @@ import org.apache.spark.util.Utils
 
 class DriverSuite extends FunSuite with Timeouts {
   test("driver should exit after finishing") {
-    assert(System.getenv("SPARK_HOME") != null)
+    val sparkHome = Option(System.getenv("SPARK_HOME"))
+      .orElse(Option(System.getProperty("spark.home"))).get
     // Regression test for SPARK-530: "Spark driver process doesn't exit after finishing"
     val masters = Table(("master"), ("local"), ("local-cluster[2,1,512]"))
     forAll(masters) { (master: String) =>
       failAfter(60 seconds) {
-        Utils.execute(Seq("./spark-class", "org.apache.spark.DriverWithoutCleanup", master),
-          new File(System.getenv("SPARK_HOME")))
+        Utils.executeAndGetOutput(
+          Seq("./spark-class", "org.apache.spark.DriverWithoutCleanup", master),
+          new File(sparkHome), 
+          Map("SPARK_TESTING" -> "1", "SPARK_HOME" -> sparkHome))
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/94f2fffa/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala b/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala
index 4cb4ddc..894a722 100644
--- a/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala
@@ -18,13 +18,19 @@
 package org.apache.spark.deploy.worker
 
 import java.io.File
+
+import scala.util.Try
+
 import org.scalatest.FunSuite
+
 import org.apache.spark.deploy.{ExecutorState, Command, ApplicationDescription}
 
+
 class ExecutorRunnerTest extends FunSuite {
   test("command includes appId") {
     def f(s:String) = new File(s)
-    val sparkHome = sys.env("SPARK_HOME")
+    val sparkHome = Try(sys.env("SPARK_HOME")).toOption
+      .orElse(Option(System.getProperty("spark.home"))).get
     val appDesc = new ApplicationDescription("app name", 8, 500, Command("foo", Seq(),Map()),
       sparkHome, "appUiUrl")
     val appId = "12345-worker321-9876"

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/94f2fffa/docs/running-on-yarn.md
----------------------------------------------------------------------
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 13d5fd3..aded643 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -12,7 +12,7 @@ was added to Spark in version 0.6.0, and improved in 0.7.0 and 0.8.0.
 We need a consolidated Spark JAR (which bundles all the required dependencies) to run Spark jobs on a YARN cluster.
 This can be built by setting the Hadoop version and `SPARK_YARN` environment variable, as follows:
 
-    SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true ./sbt assembly
+    SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true sbt assembly
 
 The assembled JAR will be something like this:
 `./assembly/target/scala-{{site.SCALA_VERSION}}/spark-assembly_{{site.SPARK_VERSION}}-hadoop2.0.5.jar`.
@@ -72,7 +72,7 @@ The command to launch the YARN Client is as follows:
 For example:
 
     # Build the Spark assembly JAR and the Spark examples JAR
-    $ SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true ./sbt assembly
+    $ SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true sbt assembly
 
     # Configure logging
     $ cp conf/log4j.properties.template conf/log4j.properties

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/94f2fffa/project/SparkBuild.scala
----------------------------------------------------------------------
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index b335b5a..8290e7c 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -20,6 +20,7 @@ import sbt.Classpaths.publishTask
 import Keys._
 import sbtassembly.Plugin._
 import AssemblyKeys._
+import scala.util.Properties
 // For Sonatype publishing
 //import com.jsuereth.pgp.sbtplugin.PgpKeys._
 
@@ -68,10 +69,12 @@ object SparkBuild extends Build {
   // A configuration to set an alternative publishLocalConfiguration
   lazy val MavenCompile = config("m2r") extend(Compile)
   lazy val publishLocalBoth = TaskKey[Unit]("publish-local", "publish local for m2 and ivy")
-
+  val sparkHome = System.getProperty("user.dir")
+  System.setProperty("spark.home", sparkHome)
+  System.setProperty("spark.testing", "1")
   // Allows build configuration to be set through environment variables
-  lazy val hadoopVersion = scala.util.Properties.envOrElse("SPARK_HADOOP_VERSION", DEFAULT_HADOOP_VERSION)
-  lazy val isNewHadoop = scala.util.Properties.envOrNone("SPARK_IS_NEW_HADOOP") match {
+  lazy val hadoopVersion = Properties.envOrElse("SPARK_HADOOP_VERSION", DEFAULT_HADOOP_VERSION)
+  lazy val isNewHadoop = Properties.envOrNone("SPARK_IS_NEW_HADOOP") match {
     case None => {
       val isNewHadoopVersion = "2.[2-9]+".r.findFirstIn(hadoopVersion).isDefined
       (isNewHadoopVersion|| DEFAULT_IS_NEW_HADOOP)
@@ -79,7 +82,7 @@ object SparkBuild extends Build {
     case Some(v) => v.toBoolean
   }
 
-  lazy val isYarnEnabled = scala.util.Properties.envOrNone("SPARK_YARN") match {
+  lazy val isYarnEnabled = Properties.envOrNone("SPARK_YARN") match {
     case None => DEFAULT_YARN
     case Some(v) => v.toBoolean
   }
@@ -112,8 +115,9 @@ object SparkBuild extends Build {
 
     // Fork new JVMs for tests and set Java options for those
     fork := true,
+    javaOptions += "-Dspark.home=" + sparkHome,
+    javaOptions += "-Dspark.testing=1",
     javaOptions += "-Xmx3g",
-
     // Show full stack trace and duration in test cases.
     testOptions in Test += Tests.Argument("-oDF"),
 

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/94f2fffa/python/lib/py4j-0.8.1.zip
----------------------------------------------------------------------
diff --git a/python/lib/py4j-0.8.1.zip b/python/lib/py4j-0.8.1.zip
new file mode 100644
index 0000000..3231e31
Binary files /dev/null and b/python/lib/py4j-0.8.1.zip differ

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/94f2fffa/python/lib/py4j0.7.egg
----------------------------------------------------------------------
diff --git a/python/lib/py4j0.7.egg b/python/lib/py4j0.7.egg
deleted file mode 100644
index f8a339d..0000000
Binary files a/python/lib/py4j0.7.egg and /dev/null differ

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/94f2fffa/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
----------------------------------------------------------------------
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
index f108c70..7e54421 100644
--- a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
@@ -942,12 +942,14 @@ class SparkILoop(in0: Option[BufferedReader], protected val out: JPrintWriter,
     val conf = new SparkConf()
       .setMaster(master)
       .setAppName("Spark shell")
-      .setSparkHome(System.getenv("SPARK_HOME"))
       .setJars(jars)
       .set("spark.repl.class.uri", intp.classServer.uri)
     if (execUri != null) {
       conf.set("spark.executor.uri", execUri)
     }
+    if (System.getenv("SPARK_HOME") != null) {
+      conf.setSparkHome(System.getenv("SPARK_HOME"))
+    }
     sparkContext = new SparkContext(conf)
     echo("Created spark context..")
     sparkContext

[09/10] git commit: Merge remote-tracking branch 'apache-github/master' into remove-binaries

Posted by pw...@apache.org.

Merge remote-tracking branch 'apache-github/master' into remove-binaries

Conflicts:
	core/src/test/scala/org/apache/spark/DriverSuite.scala
	docs/python-programming-guide.md


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/604fad9c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/604fad9c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/604fad9c

Branch: refs/heads/master
Commit: 604fad9c39763012d97b404941f7ba7137ec2eed
Parents: 9e6f3bd c4d6145
Author: Patrick Wendell <pw...@gmail.com>
Authored: Fri Jan 3 21:29:33 2014 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Fri Jan 3 21:29:33 2014 -0800

----------------------------------------------------------------------
 .gitignore                                      |    2 +
 README.md                                       |    8 +-
 assembly/pom.xml                                |   12 +-
 assembly/src/main/assembly/assembly.xml         |   11 +-
 bin/compute-classpath.cmd                       |    2 +-
 bin/compute-classpath.sh                        |    2 +-
 bin/pyspark                                     |   70 ++
 bin/pyspark.cmd                                 |   23 +
 bin/pyspark2.cmd                                |   55 +
 bin/run-example                                 |   91 ++
 bin/run-example.cmd                             |   23 +
 bin/run-example2.cmd                            |   61 ++
 bin/slaves.sh                                   |   91 --
 bin/spark-class                                 |  154 +++
 bin/spark-class.cmd                             |   23 +
 bin/spark-class2.cmd                            |   85 ++
 bin/spark-config.sh                             |   36 -
 bin/spark-daemon.sh                             |  183 ----
 bin/spark-daemons.sh                            |   35 -
 bin/spark-shell                                 |  102 ++
 bin/spark-shell.cmd                             |   23 +
 bin/start-all.sh                                |   34 -
 bin/start-master.sh                             |   52 -
 bin/start-slave.sh                              |   35 -
 bin/start-slaves.sh                             |   48 -
 bin/stop-all.sh                                 |   32 -
 bin/stop-master.sh                              |   27 -
 bin/stop-slaves.sh                              |   35 -
 .../mesos/CoarseMesosSchedulerBackend.scala     |    4 +-
 .../cluster/mesos/MesosSchedulerBackend.scala   |    4 +-
 .../apache/spark/ui/UIWorkloadGenerator.scala   |    4 +-
 .../scala/org/apache/spark/DriverSuite.scala    |    2 +-
 data/kmeans_data.txt                            |    6 +
 data/lr_data.txt                                | 1000 ++++++++++++++++++
 data/pagerank_data.txt                          |    6 +
 docs/bagel-programming-guide.md                 |    4 +-
 docs/building-with-maven.md                     |   14 +-
 docs/index.md                                   |   10 +-
 docs/java-programming-guide.md                  |    4 +-
 docs/mllib-guide.md                             |    2 +-
 docs/python-programming-guide.md                |   28 +-
 docs/quick-start.md                             |    8 +-
 docs/running-on-yarn.md                         |   11 +-
 docs/scala-programming-guide.md                 |   14 +-
 docs/spark-debugger.md                          |    2 +-
 docs/spark-standalone.md                        |   20 +-
 docs/streaming-programming-guide.md             |    4 +-
 ec2/spark_ec2.py                                |    2 +-
 .../streaming/examples/JavaKafkaWordCount.java  |    2 +-
 .../streaming/examples/ActorWordCount.scala     |    4 +-
 .../streaming/examples/HdfsWordCount.scala      |    2 +-
 .../streaming/examples/KafkaWordCount.scala     |    2 +-
 .../streaming/examples/MQTTWordCount.scala      |    4 +-
 .../streaming/examples/NetworkWordCount.scala   |    2 +-
 .../examples/StatefulNetworkWordCount.scala     |    2 +-
 .../streaming/examples/ZeroMQWordCount.scala    |    4 +-
 .../clickstream/PageViewGenerator.scala         |    4 +-
 .../examples/clickstream/PageViewStream.scala   |    4 +-
 kmeans_data.txt                                 |    6 -
 lr_data.txt                                     | 1000 ------------------
 make-distribution.sh                            |   11 +-
 new-yarn/pom.xml                                |  161 ---
 .../spark/deploy/yarn/ApplicationMaster.scala   |  428 --------
 .../yarn/ApplicationMasterArguments.scala       |   94 --
 .../org/apache/spark/deploy/yarn/Client.scala   |  523 ---------
 .../spark/deploy/yarn/ClientArguments.scala     |  150 ---
 .../yarn/ClientDistributedCacheManager.scala    |  228 ----
 .../spark/deploy/yarn/WorkerLauncher.scala      |  225 ----
 .../spark/deploy/yarn/WorkerRunnable.scala      |  209 ----
 .../deploy/yarn/YarnAllocationHandler.scala     |  694 ------------
 .../spark/deploy/yarn/YarnSparkHadoopUtil.scala |   43 -
 .../cluster/YarnClientClusterScheduler.scala    |   48 -
 .../cluster/YarnClientSchedulerBackend.scala    |  110 --
 .../cluster/YarnClusterScheduler.scala          |   56 -
 .../ClientDistributedCacheManagerSuite.scala    |  220 ----
 pagerank_data.txt                               |    6 -
 pom.xml                                         |   59 +-
 project/SparkBuild.scala                        |   32 +-
 pyspark                                         |   70 --
 pyspark.cmd                                     |   23 -
 pyspark2.cmd                                    |   55 -
 python/pyspark/java_gateway.py                  |    2 +-
 python/pyspark/shell.py                         |    2 +-
 python/run-tests                                |    2 +-
 repl-bin/src/deb/bin/run                        |    3 +-
 repl/pom.xml                                    |    1 -
 run-example                                     |   91 --
 run-example.cmd                                 |   23 -
 run-example2.cmd                                |   61 --
 sbin/slaves.sh                                  |   91 ++
 sbin/spark-config.sh                            |   36 +
 sbin/spark-daemon.sh                            |  183 ++++
 sbin/spark-daemons.sh                           |   35 +
 sbin/spark-executor                             |   23 +
 sbin/start-all.sh                               |   34 +
 sbin/start-master.sh                            |   52 +
 sbin/start-slave.sh                             |   35 +
 sbin/start-slaves.sh                            |   48 +
 sbin/stop-all.sh                                |   32 +
 sbin/stop-master.sh                             |   27 +
 sbin/stop-slaves.sh                             |   35 +
 spark-class                                     |  154 ---
 spark-class.cmd                                 |   23 -
 spark-class2.cmd                                |   85 --
 spark-executor                                  |   22 -
 spark-shell                                     |  102 --
 spark-shell.cmd                                 |   22 -
 yarn/README.md                                  |   12 +
 yarn/alpha/pom.xml                              |   32 +
 .../spark/deploy/yarn/ApplicationMaster.scala   |  464 ++++++++
 .../org/apache/spark/deploy/yarn/Client.scala   |  509 +++++++++
 .../spark/deploy/yarn/WorkerLauncher.scala      |  250 +++++
 .../spark/deploy/yarn/WorkerRunnable.scala      |  236 +++++
 .../deploy/yarn/YarnAllocationHandler.scala     |  680 ++++++++++++
 .../yarn/ApplicationMasterArguments.scala       |   94 ++
 .../spark/deploy/yarn/ClientArguments.scala     |  150 +++
 .../yarn/ClientDistributedCacheManager.scala    |  228 ++++
 .../spark/deploy/yarn/YarnSparkHadoopUtil.scala |   43 +
 .../cluster/YarnClientClusterScheduler.scala    |   48 +
 .../cluster/YarnClientSchedulerBackend.scala    |  110 ++
 .../cluster/YarnClusterScheduler.scala          |   56 +
 .../ClientDistributedCacheManagerSuite.scala    |  220 ++++
 yarn/pom.xml                                    |   84 +-
 .../spark/deploy/yarn/ApplicationMaster.scala   |  458 --------
 .../yarn/ApplicationMasterArguments.scala       |   94 --
 .../org/apache/spark/deploy/yarn/Client.scala   |  505 ---------
 .../spark/deploy/yarn/ClientArguments.scala     |  147 ---
 .../yarn/ClientDistributedCacheManager.scala    |  228 ----
 .../spark/deploy/yarn/WorkerLauncher.scala      |  247 -----
 .../spark/deploy/yarn/WorkerRunnable.scala      |  235 ----
 .../deploy/yarn/YarnAllocationHandler.scala     |  680 ------------
 .../spark/deploy/yarn/YarnSparkHadoopUtil.scala |   43 -
 .../cluster/YarnClientClusterScheduler.scala    |   48 -
 .../cluster/YarnClientSchedulerBackend.scala    |  110 --
 .../cluster/YarnClusterScheduler.scala          |   59 --
 .../ClientDistributedCacheManagerSuite.scala    |  220 ----
 yarn/stable/pom.xml                             |   32 +
 .../spark/deploy/yarn/ApplicationMaster.scala   |  432 ++++++++
 .../org/apache/spark/deploy/yarn/Client.scala   |  525 +++++++++
 .../spark/deploy/yarn/WorkerLauncher.scala      |  230 ++++
 .../spark/deploy/yarn/WorkerRunnable.scala      |  210 ++++
 .../deploy/yarn/YarnAllocationHandler.scala     |  695 ++++++++++++
 142 files changed, 7803 insertions(+), 8820 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/604fad9c/README.md
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/604fad9c/assembly/pom.xml
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/604fad9c/bin/pyspark
----------------------------------------------------------------------
diff --cc bin/pyspark
index 0000000,d6810f4..f97dfa7
mode 000000,100755..100755
--- a/bin/pyspark
+++ b/bin/pyspark
@@@ -1,0 -1,70 +1,70 @@@
+ #!/usr/bin/env bash
+ 
+ #
+ # Licensed to the Apache Software Foundation (ASF) under one or more
+ # contributor license agreements.  See the NOTICE file distributed with
+ # this work for additional information regarding copyright ownership.
+ # The ASF licenses this file to You under the Apache License, Version 2.0
+ # (the "License"); you may not use this file except in compliance with
+ # the License.  You may obtain a copy of the License at
+ #
+ #    http://www.apache.org/licenses/LICENSE-2.0
+ #
+ # Unless required by applicable law or agreed to in writing, software
+ # distributed under the License is distributed on an "AS IS" BASIS,
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ # See the License for the specific language governing permissions and
+ # limitations under the License.
+ #
+ 
+ # Figure out where the Scala framework is installed
+ FWDIR="$(cd `dirname $0`/..; pwd)"
+ 
+ # Export this as SPARK_HOME
+ export SPARK_HOME="$FWDIR"
+ 
+ SCALA_VERSION=2.10
+ 
+ # Exit if the user hasn't compiled Spark
+ if [ ! -f "$FWDIR/RELEASE" ]; then
+   # Exit if the user hasn't compiled Spark
+   ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar >& /dev/null
+   if [[ $? != 0 ]]; then
+     echo "Failed to find Spark assembly in $FWDIR/assembly/target" >&2
 -    echo "You need to build Spark with sbt/sbt assembly before running this program" >&2
++    echo "You need to build Spark with sbt assembly before running this program" >&2
+     exit 1
+   fi
+ fi
+ 
+ # Load environment variables from conf/spark-env.sh, if it exists
+ if [ -e "$FWDIR/conf/spark-env.sh" ] ; then
+   . $FWDIR/conf/spark-env.sh
+ fi
+ 
+ # Figure out which Python executable to use
+ if [ -z "$PYSPARK_PYTHON" ] ; then
+   PYSPARK_PYTHON="python"
+ fi
+ export PYSPARK_PYTHON
+ 
+ # Add the PySpark classes to the Python path:
+ export PYTHONPATH=$SPARK_HOME/python/:$PYTHONPATH
+ 
+ # Load the PySpark shell.py script when ./pyspark is used interactively:
+ export OLD_PYTHONSTARTUP=$PYTHONSTARTUP
+ export PYTHONSTARTUP=$FWDIR/python/pyspark/shell.py
+ 
+ if [ -n "$IPYTHON_OPTS" ]; then
+   IPYTHON=1
+ fi
+ 
+ if [[ "$IPYTHON" = "1" ]] ; then
+   # IPython <1.0.0 doesn't honor PYTHONSTARTUP, while 1.0.0+ does. 
+   # Hence we clear PYTHONSTARTUP and use the -c "%run $IPYTHONSTARTUP" command which works on all versions
+   # We also force interactive mode with "-i"
+   IPYTHONSTARTUP=$PYTHONSTARTUP
+   PYTHONSTARTUP=
+   exec ipython "$IPYTHON_OPTS" -i -c "%run $IPYTHONSTARTUP"
+ else
+   exec "$PYSPARK_PYTHON" "$@"
+ fi

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/604fad9c/bin/run-example
----------------------------------------------------------------------
diff --cc bin/run-example
index 0000000,6c5d4a6..dfb4bf7
mode 000000,100755..100755
--- a/bin/run-example
+++ b/bin/run-example
@@@ -1,0 -1,91 +1,91 @@@
+ #!/usr/bin/env bash
+ 
+ #
+ # Licensed to the Apache Software Foundation (ASF) under one or more
+ # contributor license agreements.  See the NOTICE file distributed with
+ # this work for additional information regarding copyright ownership.
+ # The ASF licenses this file to You under the Apache License, Version 2.0
+ # (the "License"); you may not use this file except in compliance with
+ # the License.  You may obtain a copy of the License at
+ #
+ #    http://www.apache.org/licenses/LICENSE-2.0
+ #
+ # Unless required by applicable law or agreed to in writing, software
+ # distributed under the License is distributed on an "AS IS" BASIS,
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ # See the License for the specific language governing permissions and
+ # limitations under the License.
+ #
+ 
+ cygwin=false
+ case "`uname`" in
+     CYGWIN*) cygwin=true;;
+ esac
+ 
+ SCALA_VERSION=2.10
+ 
+ # Figure out where the Scala framework is installed
+ FWDIR="$(cd `dirname $0`/..; pwd)"
+ 
+ # Export this as SPARK_HOME
+ export SPARK_HOME="$FWDIR"
+ 
+ # Load environment variables from conf/spark-env.sh, if it exists
+ if [ -e "$FWDIR/conf/spark-env.sh" ] ; then
+   . $FWDIR/conf/spark-env.sh
+ fi
+ 
+ if [ -z "$1" ]; then
+   echo "Usage: run-example <example-class> [<args>]" >&2
+   exit 1
+ fi
+ 
+ # Figure out the JAR file that our examples were packaged into. This includes a bit of a hack
+ # to avoid the -sources and -doc packages that are built by publish-local.
+ EXAMPLES_DIR="$FWDIR"/examples
+ SPARK_EXAMPLES_JAR=""
+ if [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/*assembly*[0-9Tg].jar ]; then
+   # Use the JAR from the SBT build
+   export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/*assembly*[0-9Tg].jar`
+ fi
+ if [ -e "$EXAMPLES_DIR"/target/spark-examples*[0-9Tg].jar ]; then
+   # Use the JAR from the Maven build
+   # TODO: this also needs to become an assembly!
+   export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR"/target/spark-examples*[0-9Tg].jar`
+ fi
+ if [[ -z $SPARK_EXAMPLES_JAR ]]; then
+   echo "Failed to find Spark examples assembly in $FWDIR/examples/target" >&2
 -  echo "You need to build Spark with sbt/sbt assembly before running this program" >&2
++  echo "You need to build Spark with sbt assembly before running this program" >&2
+   exit 1
+ fi
+ 
+ # Since the examples JAR ideally shouldn't include spark-core (that dependency should be
+ # "provided"), also add our standard Spark classpath, built using compute-classpath.sh.
+ CLASSPATH=`$FWDIR/bin/compute-classpath.sh`
+ CLASSPATH="$SPARK_EXAMPLES_JAR:$CLASSPATH"
+ 
+ if $cygwin; then
+     CLASSPATH=`cygpath -wp $CLASSPATH`
+     export SPARK_EXAMPLES_JAR=`cygpath -w $SPARK_EXAMPLES_JAR`
+ fi
+ 
+ # Find java binary
+ if [ -n "${JAVA_HOME}" ]; then
+   RUNNER="${JAVA_HOME}/bin/java"
+ else
+   if [ `command -v java` ]; then
+     RUNNER="java"
+   else
+     echo "JAVA_HOME is not set" >&2
+     exit 1
+   fi
+ fi
+ 
+ if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then
+   echo -n "Spark Command: "
+   echo "$RUNNER" -cp "$CLASSPATH" "$@"
+   echo "========================================"
+   echo
+ fi
+ 
+ exec "$RUNNER" -cp "$CLASSPATH" "$@"

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/604fad9c/bin/spark-class
----------------------------------------------------------------------
diff --cc bin/spark-class
index 0000000,c4225a3..49b0bef
mode 000000,100755..100755
--- a/bin/spark-class
+++ b/bin/spark-class
@@@ -1,0 -1,154 +1,154 @@@
+ #!/usr/bin/env bash
+ 
+ #
+ # Licensed to the Apache Software Foundation (ASF) under one or more
+ # contributor license agreements.  See the NOTICE file distributed with
+ # this work for additional information regarding copyright ownership.
+ # The ASF licenses this file to You under the Apache License, Version 2.0
+ # (the "License"); you may not use this file except in compliance with
+ # the License.  You may obtain a copy of the License at
+ #
+ #    http://www.apache.org/licenses/LICENSE-2.0
+ #
+ # Unless required by applicable law or agreed to in writing, software
+ # distributed under the License is distributed on an "AS IS" BASIS,
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ # See the License for the specific language governing permissions and
+ # limitations under the License.
+ #
+ 
+ cygwin=false
+ case "`uname`" in
+     CYGWIN*) cygwin=true;;
+ esac
+ 
+ SCALA_VERSION=2.10
+ 
+ # Figure out where the Scala framework is installed
+ FWDIR="$(cd `dirname $0`/..; pwd)"
+ 
+ # Export this as SPARK_HOME
+ export SPARK_HOME="$FWDIR"
+ 
+ # Load environment variables from conf/spark-env.sh, if it exists
+ if [ -e "$FWDIR/conf/spark-env.sh" ] ; then
+   . $FWDIR/conf/spark-env.sh
+ fi
+ 
+ if [ -z "$1" ]; then
+   echo "Usage: spark-class <class> [<args>]" >&2
+   exit 1
+ fi
+ 
+ # If this is a standalone cluster daemon, reset SPARK_JAVA_OPTS and SPARK_MEM to reasonable
+ # values for that; it doesn't need a lot
+ if [ "$1" = "org.apache.spark.deploy.master.Master" -o "$1" = "org.apache.spark.deploy.worker.Worker" ]; then
+   SPARK_MEM=${SPARK_DAEMON_MEMORY:-512m}
+   SPARK_DAEMON_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS -Dspark.akka.logLifecycleEvents=true"
+   # Do not overwrite SPARK_JAVA_OPTS environment variable in this script
+   OUR_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS"   # Empty by default
+ else
+   OUR_JAVA_OPTS="$SPARK_JAVA_OPTS"
+ fi
+ 
+ 
+ # Add java opts for master, worker, executor. The opts maybe null
+ case "$1" in
+   'org.apache.spark.deploy.master.Master')
+     OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_MASTER_OPTS"
+     ;;
+   'org.apache.spark.deploy.worker.Worker')
+     OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_WORKER_OPTS"
+     ;;
+   'org.apache.spark.executor.CoarseGrainedExecutorBackend')
+     OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_EXECUTOR_OPTS"
+     ;;
+   'org.apache.spark.executor.MesosExecutorBackend')
+     OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_EXECUTOR_OPTS"
+     ;;
+   'org.apache.spark.repl.Main')
+     OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_REPL_OPTS"
+     ;;
+ esac
+ 
+ # Find the java binary
+ if [ -n "${JAVA_HOME}" ]; then
+   RUNNER="${JAVA_HOME}/bin/java"
+ else
+   if [ `command -v java` ]; then
+     RUNNER="java"
+   else
+     echo "JAVA_HOME is not set" >&2
+     exit 1
+   fi
+ fi
+ 
+ # Set SPARK_MEM if it isn't already set since we also use it for this process
+ SPARK_MEM=${SPARK_MEM:-512m}
+ export SPARK_MEM
+ 
+ # Set JAVA_OPTS to be able to load native libraries and to set heap size
+ JAVA_OPTS="$OUR_JAVA_OPTS"
+ JAVA_OPTS="$JAVA_OPTS -Djava.library.path=$SPARK_LIBRARY_PATH"
+ JAVA_OPTS="$JAVA_OPTS -Xms$SPARK_MEM -Xmx$SPARK_MEM"
+ # Load extra JAVA_OPTS from conf/java-opts, if it exists
+ if [ -e "$FWDIR/conf/java-opts" ] ; then
+   JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`"
+ fi
+ export JAVA_OPTS
+ # Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in ExecutorRunner.scala!
+ 
+ if [ ! -f "$FWDIR/RELEASE" ]; then
+   # Exit if the user hasn't compiled Spark
+   num_jars=$(ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/ | grep "spark-assembly.*hadoop.*.jar" | wc -l)
+   jars_list=$(ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/ | grep "spark-assembly.*hadoop.*.jar")
+   if [ "$num_jars" -eq "0" ]; then
+     echo "Failed to find Spark assembly in $FWDIR/assembly/target/scala-$SCALA_VERSION/" >&2
 -    echo "You need to build Spark with 'sbt/sbt assembly' before running this program." >&2
++    echo "You need to build Spark with 'sbt assembly' before running this program." >&2
+     exit 1
+   fi
+   if [ "$num_jars" -gt "1" ]; then
+     echo "Found multiple Spark assembly jars in $FWDIR/assembly/target/scala-$SCALA_VERSION:" >&2
+     echo "$jars_list"
+     echo "Please remove all but one jar."
+     exit 1
+   fi
+ fi
+ 
+ TOOLS_DIR="$FWDIR"/tools
+ SPARK_TOOLS_JAR=""
+ if [ -e "$TOOLS_DIR"/target/scala-$SCALA_VERSION/*assembly*[0-9Tg].jar ]; then
+   # Use the JAR from the SBT build
+   export SPARK_TOOLS_JAR=`ls "$TOOLS_DIR"/target/scala-$SCALA_VERSION/*assembly*[0-9Tg].jar`
+ fi
+ if [ -e "$TOOLS_DIR"/target/spark-tools*[0-9Tg].jar ]; then
+   # Use the JAR from the Maven build
+   # TODO: this also needs to become an assembly!
+   export SPARK_TOOLS_JAR=`ls "$TOOLS_DIR"/target/spark-tools*[0-9Tg].jar`
+ fi
+ 
+ # Compute classpath using external script
+ CLASSPATH=`$FWDIR/bin/compute-classpath.sh`
+ 
+ if [ "$1" == "org.apache.spark.tools.JavaAPICompletenessChecker" ]; then
+   CLASSPATH="$CLASSPATH:$SPARK_TOOLS_JAR"
+ fi
+ 
+ if $cygwin; then
+   CLASSPATH=`cygpath -wp $CLASSPATH`
+   if [ "$1" == "org.apache.spark.tools.JavaAPICompletenessChecker" ]; then
+     export SPARK_TOOLS_JAR=`cygpath -w $SPARK_TOOLS_JAR`
+   fi
+ fi
+ export CLASSPATH
+ 
+ if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then
+   echo -n "Spark Command: "
+   echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"
+   echo "========================================"
+   echo
+ fi
+ 
+ exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"
+ 
+ 

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/604fad9c/core/src/test/scala/org/apache/spark/DriverSuite.scala
----------------------------------------------------------------------
diff --cc core/src/test/scala/org/apache/spark/DriverSuite.scala
index 7e1e55f,605588f..fb89537
--- a/core/src/test/scala/org/apache/spark/DriverSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DriverSuite.scala
@@@ -35,10 -35,8 +35,10 @@@ class DriverSuite extends FunSuite wit
      val masters = Table(("master"), ("local"), ("local-cluster[2,1,512]"))
      forAll(masters) { (master: String) =>
        failAfter(60 seconds) {
 -        Utils.execute(Seq("./bin/spark-class", "org.apache.spark.DriverWithoutCleanup", master),
 -          new File(System.getenv("SPARK_HOME")))
 +        Utils.executeAndGetOutput(
-           Seq("./spark-class", "org.apache.spark.DriverWithoutCleanup", master),
++          Seq("./bin/spark-class", "org.apache.spark.DriverWithoutCleanup", master),
 +          new File(sparkHome), 
 +          Map("SPARK_TESTING" -> "1", "SPARK_HOME" -> sparkHome))
        }
      }
    }

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/604fad9c/docs/index.md
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/604fad9c/docs/python-programming-guide.md
----------------------------------------------------------------------
diff --cc docs/python-programming-guide.md
index 45a6250,dc187b3..5d48cb6
--- a/docs/python-programming-guide.md
+++ b/docs/python-programming-guide.md
@@@ -66,11 -66,11 +66,11 @@@ The script automatically adds the `bin/
  
  # Interactive Use
  
- The `pyspark` script launches a Python interpreter that is configured to run PySpark applications. To use `pyspark` interactively, first build Spark, then launch it directly from the command line without any options:
+ The `bin/pyspark` script launches a Python interpreter that is configured to run PySpark applications. To use `pyspark` interactively, first build Spark, then launch it directly from the command line without any options:
  
  {% highlight bash %}
 -$ sbt/sbt assembly
 +$ sbt assembly
- $ ./pyspark
+ $ ./bin/pyspark
  {% endhighlight %}
  
  The Python shell can be used explore data interactively and is a simple way to learn the API:

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/604fad9c/docs/quick-start.md
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/604fad9c/docs/running-on-yarn.md
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/604fad9c/docs/scala-programming-guide.md
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/604fad9c/make-distribution.sh
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/604fad9c/project/SparkBuild.scala
----------------------------------------------------------------------

[03/10] git commit: ignoring tests for now, contrary to what I assumed these tests make sense given what they are testing.

Posted by pw...@apache.org.

ignoring tests for now, contrary to what I assumed these tests make sense given what they are testing.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/436f3d28
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/436f3d28
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/436f3d28

Branch: refs/heads/master
Commit: 436f3d28560bed9f428efce6f7c4caf44111c60e
Parents: 6be4c11
Author: Prashant Sharma <pr...@imaginea.com>
Authored: Thu Jan 2 16:08:35 2014 +0530
Committer: Prashant Sharma <pr...@imaginea.com>
Committed: Thu Jan 2 16:08:35 2014 +0530

----------------------------------------------------------------------
 core/src/test/resources/uncommons-maths-1.2.2.jar  | Bin 49019 -> 0 bytes
 .../scala/org/apache/spark/FileServerSuite.scala   |   6 +++---
 2 files changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/436f3d28/core/src/test/resources/uncommons-maths-1.2.2.jar
----------------------------------------------------------------------
diff --git a/core/src/test/resources/uncommons-maths-1.2.2.jar b/core/src/test/resources/uncommons-maths-1.2.2.jar
deleted file mode 100644
index e126001..0000000
Binary files a/core/src/test/resources/uncommons-maths-1.2.2.jar and /dev/null differ

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/436f3d28/core/src/test/scala/org/apache/spark/FileServerSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/FileServerSuite.scala b/core/src/test/scala/org/apache/spark/FileServerSuite.scala
index c210dd5..063b5fb 100644
--- a/core/src/test/scala/org/apache/spark/FileServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileServerSuite.scala
@@ -75,7 +75,7 @@ class FileServerSuite extends FunSuite with LocalSparkContext {
     assert(result.toSet === Set((1,200), (2,300), (3,500)))
   }
 
-  test ("Dynamically adding JARS locally") {
+  ignore ("Dynamically adding JARS locally") {
     sc = new SparkContext("local[4]", "test")
     val sampleJarFile = getClass.getClassLoader.getResource("uncommons-maths-1.2.2.jar").getFile()
     sc.addJar(sampleJarFile)
@@ -105,7 +105,7 @@ class FileServerSuite extends FunSuite with LocalSparkContext {
     assert(result.toSet === Set((1,200), (2,300), (3,500)))
   }
 
-  test ("Dynamically adding JARS on a standalone cluster") {
+  ignore ("Dynamically adding JARS on a standalone cluster") {
     sc = new SparkContext("local-cluster[1,1,512]", "test")
     val sampleJarFile = getClass.getClassLoader.getResource("uncommons-maths-1.2.2.jar").getFile()
     sc.addJar(sampleJarFile)
@@ -121,7 +121,7 @@ class FileServerSuite extends FunSuite with LocalSparkContext {
     assert(result.toSet === Set((1,2), (2,7), (3,121)))
   }
 
-  test ("Dynamically adding JARS on a standalone cluster using local: URL") {
+  ignore ("Dynamically adding JARS on a standalone cluster using local: URL") {
     sc = new SparkContext("local-cluster[1,1,512]", "test")
     val sampleJarFile = getClass.getClassLoader.getResource("uncommons-maths-1.2.2.jar").getFile()
     sc.addJar(sampleJarFile.replace("file", "local"))

[04/10] git commit: Removed a repeated test and changed tests to not use uncommons jar

Posted by pw...@apache.org.

Removed a repeated test and changed tests to not use uncommons jar


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/08ec10de
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/08ec10de
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/08ec10de

Branch: refs/heads/master
Commit: 08ec10de1767ca543047b79c40ab50a04ce5df2f
Parents: 436f3d2
Author: Prashant Sharma <pr...@imaginea.com>
Authored: Thu Jan 2 17:31:33 2014 +0530
Committer: Prashant Sharma <pr...@imaginea.com>
Committed: Thu Jan 2 17:32:11 2014 +0530

----------------------------------------------------------------------
 .../org/apache/spark/FileServerSuite.scala      | 94 +++++++++++---------
 1 file changed, 50 insertions(+), 44 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/08ec10de/core/src/test/scala/org/apache/spark/FileServerSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/FileServerSuite.scala b/core/src/test/scala/org/apache/spark/FileServerSuite.scala
index 063b5fb..a15c375 100644
--- a/core/src/test/scala/org/apache/spark/FileServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileServerSuite.scala
@@ -17,15 +17,46 @@
 
 package org.apache.spark
 
+import java.io._
+import java.util.jar.{JarEntry, JarOutputStream}
+
+import SparkContext._
 import com.google.common.io.Files
 import org.scalatest.FunSuite
-import java.io.{File, PrintWriter, FileReader, BufferedReader}
-import SparkContext._
 
 class FileServerSuite extends FunSuite with LocalSparkContext {
 
   @transient var tmpFile: File = _
-  @transient var testJarFile: File = _
+  @transient var testJarFile: String = _
+
+
+  override def beforeAll() {
+    super.beforeAll()
+    val buffer = new Array[Byte](10240)
+    val tmpdir = new File(Files.createTempDir(), "test")
+    tmpdir.mkdir()
+    val tmpJarEntry = new File(tmpdir, "FileServerSuite2.txt")
+    val pw = new PrintWriter(tmpJarEntry)
+    pw.println("test String in the file named FileServerSuite2.txt")
+    pw.close()
+    // The ugliest code possible, was translated from java.
+    val tmpFile2 = new File(tmpdir, "test.jar")
+    val stream = new FileOutputStream(tmpFile2)
+    val jar = new JarOutputStream(stream, new java.util.jar.Manifest())
+    val jarAdd = new JarEntry(tmpJarEntry.getName)
+    jarAdd.setTime(tmpJarEntry.lastModified)
+    jar.putNextEntry(jarAdd)
+    val in = new FileInputStream(tmpJarEntry)
+    var nRead = 0
+      while (nRead <= 0) {
+      nRead = in.read(buffer, 0, buffer.length)
+      jar.write(buffer, 0, nRead)
+    }
+    in.close()
+    jar.close()
+    stream.close()
+    testJarFile = tmpFile2.getAbsolutePath
+  }
 
   override def beforeEach() {
     super.beforeEach()
@@ -75,20 +106,15 @@ class FileServerSuite extends FunSuite with LocalSparkContext {
     assert(result.toSet === Set((1,200), (2,300), (3,500)))
   }
 
-  ignore ("Dynamically adding JARS locally") {
+  test ("Dynamically adding JARS locally") {
     sc = new SparkContext("local[4]", "test")
-    val sampleJarFile = getClass.getClassLoader.getResource("uncommons-maths-1.2.2.jar").getFile()
-    sc.addJar(sampleJarFile)
-    val testData = Array((1,1), (1,1), (2,1), (3,5), (2,3), (3,0))
-    val result = sc.parallelize(testData).reduceByKey { (x,y) =>
-      val fac = Thread.currentThread.getContextClassLoader()
-                                    .loadClass("org.uncommons.maths.Maths")
-                                    .getDeclaredMethod("factorial", classOf[Int])
-      val a = fac.invoke(null, x.asInstanceOf[java.lang.Integer]).asInstanceOf[Long].toInt
-      val b = fac.invoke(null, y.asInstanceOf[java.lang.Integer]).asInstanceOf[Long].toInt
-      a + b
-    }.collect()
-    assert(result.toSet === Set((1,2), (2,7), (3,121)))
+    sc.addJar(testJarFile)
+    val testData = Array((1, 1))
+    sc.parallelize(testData).foreach { (x) =>
+      if (Thread.currentThread.getContextClassLoader.getResource("FileServerSuite2.txt") == null) {
+        throw new SparkException("jar not added")
+      }
+    }
   }
 
   test("Distributing files on a standalone cluster") {
@@ -105,35 +131,15 @@ class FileServerSuite extends FunSuite with LocalSparkContext {
     assert(result.toSet === Set((1,200), (2,300), (3,500)))
   }
 
-  ignore ("Dynamically adding JARS on a standalone cluster") {
+  test ("Dynamically adding JARS on a standalone cluster") {
     sc = new SparkContext("local-cluster[1,1,512]", "test")
-    val sampleJarFile = getClass.getClassLoader.getResource("uncommons-maths-1.2.2.jar").getFile()
-    sc.addJar(sampleJarFile)
-    val testData = Array((1,1), (1,1), (2,1), (3,5), (2,3), (3,0))
-    val result = sc.parallelize(testData).reduceByKey { (x,y) =>
-      val fac = Thread.currentThread.getContextClassLoader()
-                                    .loadClass("org.uncommons.maths.Maths")
-                                    .getDeclaredMethod("factorial", classOf[Int])
-      val a = fac.invoke(null, x.asInstanceOf[java.lang.Integer]).asInstanceOf[Long].toInt
-      val b = fac.invoke(null, y.asInstanceOf[java.lang.Integer]).asInstanceOf[Long].toInt
-      a + b
-    }.collect()
-    assert(result.toSet === Set((1,2), (2,7), (3,121)))
+    sc.addJar(testJarFile)
+    val testData = Array((1,1))
+    sc.parallelize(testData).foreach { (x) =>
+      if (Thread.currentThread.getContextClassLoader.getResource("FileServerSuite2.txt") == null) {
+        throw new SparkException("jar not added")
+      }
+    }
   }
 
-  ignore ("Dynamically adding JARS on a standalone cluster using local: URL") {
-    sc = new SparkContext("local-cluster[1,1,512]", "test")
-    val sampleJarFile = getClass.getClassLoader.getResource("uncommons-maths-1.2.2.jar").getFile()
-    sc.addJar(sampleJarFile.replace("file", "local"))
-    val testData = Array((1,1), (1,1), (2,1), (3,5), (2,3), (3,0))
-    val result = sc.parallelize(testData).reduceByKey { (x,y) =>
-      val fac = Thread.currentThread.getContextClassLoader()
-                                    .loadClass("org.uncommons.maths.Maths")
-                                    .getDeclaredMethod("factorial", classOf[Int])
-      val a = fac.invoke(null, x.asInstanceOf[java.lang.Integer]).asInstanceOf[Long].toInt
-      val b = fac.invoke(null, y.asInstanceOf[java.lang.Integer]).asInstanceOf[Long].toInt
-      a + b
-    }.collect()
-    assert(result.toSet === Set((1,2), (2,7), (3,121)))
-  }
 }