You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@toree.apache.org by lr...@apache.org on 2016/01/11 22:02:00 UTC

[18/50] [abbrv] incubator-toree git commit: Switching to use spark-submit and assembly to run kernel

Switching to use spark-submit and assembly to run kernel


Project: http://git-wip-us.apache.org/repos/asf/incubator-toree/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-toree/commit/ef77e3f2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-toree/tree/ef77e3f2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-toree/diff/ef77e3f2

Branch: refs/heads/master
Commit: ef77e3f2eb6102df29f4333ee937ef6aebffe6c0
Parents: 6f46c20
Author: Gino Bustelo <pa...@us.ibm.com>
Authored: Wed Nov 11 17:28:08 2015 -0600
Committer: Gino Bustelo <pa...@us.ibm.com>
Committed: Tue Nov 24 08:49:50 2015 -0600

----------------------------------------------------------------------
 .gitignore           |  1 +
 Makefile             | 22 ++++++++++++++--------
 Vagrantfile          |  5 +++--
 etc/bin/spark-kernel | 36 ++++++++++++++++++++++++++++++++++++
 kernel-api/build.sbt | 42 +++++++++++++++++++++---------------------
 project/Build.scala  |  4 +++-
 project/Common.scala | 29 ++++++++++++++++++++++++++---
 project/plugins.sbt  |  3 +++
 protocol/build.sbt   |  4 +++-
 9 files changed, 110 insertions(+), 36 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/ef77e3f2/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index 310328c..12db42c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,6 +11,7 @@ scratch/
 **/*ivy.xml
 test-output/
 out/
+dist/
 
 .ensime
 .ensime_cache/

http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/ef77e3f2/Makefile
----------------------------------------------------------------------
diff --git a/Makefile b/Makefile
index e41dbcd..e48d473 100644
--- a/Makefile
+++ b/Makefile
@@ -24,13 +24,11 @@ IOPUB_PORT?=48002
 CONTROL_PORT?=48003
 HB_PORT?=48004
 IP?=0.0.0.0
+VERSION?=0.1.5-SNAPSHOT
 
 clean:
 	vagrant ssh -c "cd /src/spark-kernel/ && sbt clean"
-
-kernel/target/pack/bin/sparkkernel: vagrantup ${shell find ./*/src/main/**/*}
-	vagrant ssh -c "cd /src/spark-kernel/ && sbt compile && sbt pack"
-	vagrant ssh -c "cd /src/spark-kernel/kernel/target/pack && make install"
+	@-rm -r dist
 
 build-image: IMAGE_NAME?cloudet/spark-kernel
 build-image: CACHE?=""
@@ -58,10 +56,18 @@ run-image: build-image
 vagrantup:
 	vagrant up
 
-build: kernel/target/pack/bin/sparkkernel
+kernel/target/scala-2.10/kernel-assembly-$(VERSION).jar: ${shell find ./*/src/main/**/*}
+	vagrant ssh -c "cd /src/spark-kernel/ && sbt kernel/assembly"
+
+build: kernel/target/scala-2.10/kernel-assembly-$(VERSION).jar
 
-dev: build
+dev: dist
 	vagrant ssh -c "cd ~ && ipython notebook --ip=* --no-browser"
 
-test: build
-	vagrant ssh -c "cd /src/spark-kernel/ && sbt test"
\ No newline at end of file
+test:
+	vagrant ssh -c "cd /src/spark-kernel/ && sbt compile test"
+
+dist: build
+	@mkdir -p dist/spark-kernel/bin dist/spark-kernel/lib
+	@cp -r etc/bin/* dist/spark-kernel/bin/.
+	@cp kernel/target/scala-2.10/kernel-assembly-*.jar dist/spark-kernel/lib/.
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/ef77e3f2/Vagrantfile
----------------------------------------------------------------------
diff --git a/Vagrantfile b/Vagrantfile
index 9ff2207..110befe 100644
--- a/Vagrantfile
+++ b/Vagrantfile
@@ -111,6 +111,7 @@ if ! flag_is_set SPARK; then
   wget http://apache.arvixe.com/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop2.3.tgz && \
   tar xvzf spark-${SPARK_VERSION}-bin-hadoop2.3.tgz && \
   ln -s spark-${SPARK_VERSION}-bin-hadoop2.3 spark && \
+  export SPARK_HOME=/opt/spark && \
   set_flag SPARK
 fi
 
@@ -122,13 +123,13 @@ cat << EOF > /home/vagrant/.ipython/kernels/spark/kernel.json
     "display_name": "Spark 1.5.1 (Scala 2.10.4)",
     "language_info": { "name": "scala" },
     "argv": [
-        "/home/vagrant/local/bin/sparkkernel",
+        "/src/spark-kernel/dist/spark-kernel/bin/spark-kernel",
         "--profile",
         "{connection_file}"
     ],
     "codemirror_mode": "scala",
     "env": {
-        "JVM_OPT": "-Xms1024M -Xmx4096M -Dlog4j.logLevel=trace",
+        "SPARK_OPTS": "--driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=trace"
         "MAX_INTERPRETER_THREADS": "16",
         "SPARK_CONFIGURATION": "spark.cores.max=4",
         "CAPTURE_STANDARD_OUT": "true",

http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/ef77e3f2/etc/bin/spark-kernel
----------------------------------------------------------------------
diff --git a/etc/bin/spark-kernel b/etc/bin/spark-kernel
new file mode 100755
index 0000000..18cce1d
--- /dev/null
+++ b/etc/bin/spark-kernel
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+
+#
+# Copyright 2015 IBM Corp.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+                                           ``
+PROG_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+
+if [ -z "$SPARK_HOME" ]; then
+  echo "SPARK_HOME must be set to the location of a Spark distribution!"
+  exit 1
+fi
+
+echo "Starting Spark Kernel with SPARK_HOME=$SPARK_HOME"
+
+KERNEL_ASSEMBLY=`(cd ${PROG_HOME}/lib; ls -1 kernel-assembly-*.jar;)`
+
+# disable randomized hash for string in Python 3.3+
+export PYTHONHASHSEED=0
+
+exec "$SPARK_HOME"/bin/spark-submit \
+  ${SPARK_OPTS} \
+  --driver-class-path $PROG_HOME/lib/${KERNEL_ASSEMBLY} \
+  --class com.ibm.spark.SparkKernel $PROG_HOME/lib/${KERNEL_ASSEMBLY} "$@"

http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/ef77e3f2/kernel-api/build.sbt
----------------------------------------------------------------------
diff --git a/kernel-api/build.sbt b/kernel-api/build.sbt
index ca2ba01..9995e85 100644
--- a/kernel-api/build.sbt
+++ b/kernel-api/build.sbt
@@ -43,31 +43,31 @@ libraryDependencies ++= Seq(
 
 // TODO: Mark these as provided and bring them in via the kernel project
 //       so users wanting to implement a magic do not bring in Spark itself
-libraryDependencies ++= Seq(
-  "org.apache.spark" %% "spark-core" % sparkVersion.value excludeAll( // Apache v2
-    ExclusionRule(organization = "org.apache.hadoop"),
-
-    // Exclude netty (org.jboss.netty is for 3.2.2.Final only)
-    ExclusionRule(
-      organization = "org.jboss.netty",
-      name = "netty"
-    )
-  ),
-  "org.apache.spark" %% "spark-streaming" % sparkVersion.value,      // Apache v2
-  "org.apache.spark" %% "spark-sql" % sparkVersion.value,            // Apache v2
-  "org.apache.spark" %% "spark-mllib" % sparkVersion.value,          // Apache v2
-  "org.apache.spark" %% "spark-graphx" % sparkVersion.value,         // Apache v2
-  "org.apache.spark" %% "spark-repl" % sparkVersion.value excludeAll // Apache v2
-    ExclusionRule(organization = "org.apache.hadoop")
-)
+//libraryDependencies ++= Seq(
+//  "org.apache.spark" %% "spark-core" % sparkVersion.value excludeAll( // Apache v2
+//    ExclusionRule(organization = "org.apache.hadoop"),
+//
+//    // Exclude netty (org.jboss.netty is for 3.2.2.Final only)
+//    ExclusionRule(
+//      organization = "org.jboss.netty",
+//      name = "netty"
+//    )
+//  ),
+//  "org.apache.spark" %% "spark-streaming" % sparkVersion.value,      // Apache v2
+//  "org.apache.spark" %% "spark-sql" % sparkVersion.value,            // Apache v2
+//  "org.apache.spark" %% "spark-mllib" % sparkVersion.value,          // Apache v2
+//  "org.apache.spark" %% "spark-graphx" % sparkVersion.value,         // Apache v2
+//  "org.apache.spark" %% "spark-repl" % sparkVersion.value excludeAll // Apache v2
+//    ExclusionRule(organization = "org.apache.hadoop")
+//)
 
 //
 // HADOOP DEPENDENCIES
 //
-libraryDependencies ++= Seq(
-  "org.apache.hadoop" % "hadoop-client" % hadoopVersion.value excludeAll
-    ExclusionRule(organization = "javax.servlet")
-)
+//libraryDependencies ++= Seq(
+//  "org.apache.hadoop" % "hadoop-client" % hadoopVersion.value excludeAll
+//    ExclusionRule(organization = "javax.servlet")
+//)
 
 //
 // EXECUTION DEPENDENCIES

http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/ef77e3f2/project/Build.scala
----------------------------------------------------------------------
diff --git a/project/Build.scala b/project/Build.scala
index fc8dc19..b0d867e 100644
--- a/project/Build.scala
+++ b/project/Build.scala
@@ -27,6 +27,7 @@ import sbtunidoc.Plugin._
 import scoverage.ScoverageSbtPlugin
 import xerial.sbt.Pack._
 import com.typesafe.sbt.SbtGit.{GitKeys => git}
+import sbtassembly.AssemblyKeys._
 
 object Build extends Build with Settings with SubProjects with TestTasks {
   /**
@@ -98,7 +99,8 @@ trait SubProjects extends Settings with TestTasks {
     base = file("kernel"),
     settings = fullSettings ++
       packSettings ++ Seq(
-        packMain := Map("sparkkernel" -> "com.ibm.spark.SparkKernel")
+        packMain := Map("sparkkernel" -> "com.ibm.spark.SparkKernel"),
+        test in assembly := {}
       )
   )) dependsOn(
     macros % "test->test;compile->compile",

http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/ef77e3f2/project/Common.scala
----------------------------------------------------------------------
diff --git a/project/Common.scala b/project/Common.scala
index 73e5395..f170dea 100644
--- a/project/Common.scala
+++ b/project/Common.scala
@@ -38,8 +38,11 @@ object Common {
   private val buildScalaVersion = "2.10.4"
   private val buildSbtVersion   = "0.13.7"
 
+  lazy val sparkVersion = settingKey[String]("The Apache Spark version to use")
+
   // Global dependencies provided to all projects
-  private val buildLibraryDependencies = Seq(
+  private var buildLibraryDependencies = Seq(
+
     // Needed to force consistent typesafe config with play json and spark
     "com.typesafe" % "config" % "1.2.1",
     "org.slf4j" % "slf4j-log4j12" % "1.7.5" % "test",
@@ -49,8 +52,6 @@ object Common {
     "org.mockito" % "mockito-all" % "1.9.5" % "test"   // MIT
   )
 
-  lazy val sparkVersion = settingKey[String]("The Apache Spark version to use")
-
   lazy val hadoopVersion = settingKey[String]("The Apache Hadoop version to use")
 
   // The prefix used for our custom artifact names
@@ -108,6 +109,9 @@ object Common {
       }
     },
 
+
+
+
     scalacOptions in (Compile, doc) ++= Seq(
       // Ignore packages (for Scaladoc) not from our project
       "-skip-packages", Seq(
@@ -162,6 +166,25 @@ object Common {
     compile <<= (compile in Compile) dependsOn (rebuildIvyXml dependsOn deliverLocal)
   ) ++ rebuildIvyXmlSettings // Include our rebuild ivy xml settings
 
+
+  buildLibraryDependencies ++= Seq( "org.apache.spark" %% "spark-core" % "1.5.1"  % "provided" excludeAll( // Apache v2
+    ExclusionRule(organization = "org.apache.hadoop"),
+
+    // Exclude netty (org.jboss.netty is for 3.2.2.Final only)
+    ExclusionRule(
+      organization = "org.jboss.netty",
+      name = "netty"
+    )
+    ),
+    "org.apache.spark" %% "spark-streaming" % "1.5.1" % "provided",      // Apache v2
+    "org.apache.spark" %% "spark-sql" % "1.5.1" % "provided",            // Apache v2
+    "org.apache.spark" %% "spark-mllib" % "1.5.1" % "provided",          // Apache v2
+    "org.apache.spark" %% "spark-graphx" % "1.5.1" % "provided",         // Apache v2
+    "org.apache.spark" %% "spark-repl" % "1.5.1"  % "provided" excludeAll // Apache v2
+      ExclusionRule(organization = "org.apache.hadoop"),
+    "org.apache.hadoop" % "hadoop-client" % "2.3.0" % "provided" excludeAll
+      ExclusionRule(organization = "javax.servlet"))
+
   // ==========================================================================
   // = REBUILD IVY XML SETTINGS BELOW
   // ==========================================================================

http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/ef77e3f2/project/plugins.sbt
----------------------------------------------------------------------
diff --git a/project/plugins.sbt b/project/plugins.sbt
index 4efa32c..491f0ca 100644
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -33,6 +33,9 @@ addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.7.4")
 // using `sbt pack` or `sbt pack-archive` to generate a *.tar.gz file
 addSbtPlugin("org.xerial.sbt" % "sbt-pack" % "0.6.1")
 
+// Provides abilit to create an uber-jar
+addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.0")
+
 //  Provides the ability to package our project as a docker image
 addSbtPlugin("se.marcuslonnberg" % "sbt-docker" % "0.5.2")
 

http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/ef77e3f2/protocol/build.sbt
----------------------------------------------------------------------
diff --git a/protocol/build.sbt b/protocol/build.sbt
index 36f655a..b8b0d22 100644
--- a/protocol/build.sbt
+++ b/protocol/build.sbt
@@ -22,7 +22,9 @@ resolvers += "Typesafe repository" at "http://repo.typesafe.com/typesafe/release
 // JSON DEPENDENCIES
 //
 libraryDependencies ++= Seq(
-  "com.typesafe.play" %% "play-json" % "2.3.6", // Apache v2
+  "com.typesafe.play" %% "play-json" % "2.3.6" excludeAll( // Apache v2
+      ExclusionRule(organization = "com.fasterxml.jackson.core")
+    ),
   "org.slf4j" % "slf4j-api" % "1.7.5" // MIT
 )