You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by jo...@apache.org on 2016/03/14 19:13:34 UTC

spark git commit: [SPARK-13578][CORE] Modify launch scripts to not use assemblies.

Repository: spark
Updated Branches:
  refs/heads/master 9a87afd7d -> 45f8053be


[SPARK-13578][CORE] Modify launch scripts to not use assemblies.

Instead of looking for a specially-named assembly, the scripts now will
blindly add all jars under the libs directory to the classpath. This
libs directory is still currently the old assembly dir, so things should
keep working the same way as before until we make more packaging changes.

The only lost feature is the detection of multiple assemblies; I consider
that a minor nicety that only really affects few developers, so it's probably
ok.

Tested locally by running spark-shell; also did some minor Win32 testing
(just made sure spark-shell started).

Author: Marcelo Vanzin <va...@cloudera.com>

Closes #11591 from vanzin/SPARK-13578.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/45f8053b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/45f8053b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/45f8053b

Branch: refs/heads/master
Commit: 45f8053be5c635b50c7b4ef5a0dc75d30f411291
Parents: 9a87afd
Author: Marcelo Vanzin <va...@cloudera.com>
Authored: Mon Mar 14 11:13:26 2016 -0700
Committer: Josh Rosen <jo...@databricks.com>
Committed: Mon Mar 14 11:13:26 2016 -0700

----------------------------------------------------------------------
 bin/spark-class                                 | 29 +++---------
 bin/spark-class2.cmd                            | 18 +++-----
 .../spark/launcher/AbstractCommandBuilder.java  | 48 ++++++++------------
 .../spark/launcher/CommandBuilderUtils.java     |  1 -
 .../SparkSubmitCommandBuilderSuite.java         |  1 -
 5 files changed, 31 insertions(+), 66 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/45f8053b/bin/spark-class
----------------------------------------------------------------------
diff --git a/bin/spark-class b/bin/spark-class
index 5d964ba..e710e38 100755
--- a/bin/spark-class
+++ b/bin/spark-class
@@ -35,42 +35,27 @@ else
   fi
 fi
 
-# Find assembly jar
-SPARK_ASSEMBLY_JAR=
+# Find Spark jars.
+# TODO: change the directory name when Spark jars move from "lib".
 if [ -f "${SPARK_HOME}/RELEASE" ]; then
-  ASSEMBLY_DIR="${SPARK_HOME}/lib"
+  SPARK_JARS_DIR="${SPARK_HOME}/lib"
 else
-  ASSEMBLY_DIR="${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION"
+  SPARK_JARS_DIR="${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION"
 fi
 
-GREP_OPTIONS=
-num_jars="$(ls -1 "$ASSEMBLY_DIR" | grep "^spark-assembly.*hadoop.*\.jar$" | wc -l)"
-if [ "$num_jars" -eq "0" -a -z "$SPARK_ASSEMBLY_JAR" -a "$SPARK_PREPEND_CLASSES" != "1" ]; then
-  echo "Failed to find Spark assembly in $ASSEMBLY_DIR." 1>&2
+if [ ! -d "$SPARK_JARS_DIR" ]; then
+  echo "Failed to find Spark jars directory ($SPARK_JARS_DIR)." 1>&2
   echo "You need to build Spark before running this program." 1>&2
   exit 1
 fi
-if [ -d "$ASSEMBLY_DIR" ]; then
-  ASSEMBLY_JARS="$(ls -1 "$ASSEMBLY_DIR" | grep "^spark-assembly.*hadoop.*\.jar$" || true)"
-  if [ "$num_jars" -gt "1" ]; then
-    echo "Found multiple Spark assembly jars in $ASSEMBLY_DIR:" 1>&2
-    echo "$ASSEMBLY_JARS" 1>&2
-    echo "Please remove all but one jar." 1>&2
-    exit 1
-  fi
-fi
 
-SPARK_ASSEMBLY_JAR="${ASSEMBLY_DIR}/${ASSEMBLY_JARS}"
-
-LAUNCH_CLASSPATH="$SPARK_ASSEMBLY_JAR"
+LAUNCH_CLASSPATH="$SPARK_JARS_DIR/*"
 
 # Add the launcher build dir to the classpath if requested.
 if [ -n "$SPARK_PREPEND_CLASSES" ]; then
   LAUNCH_CLASSPATH="${SPARK_HOME}/launcher/target/scala-$SPARK_SCALA_VERSION/classes:$LAUNCH_CLASSPATH"
 fi
 
-export _SPARK_ASSEMBLY="$SPARK_ASSEMBLY_JAR"
-
 # For tests
 if [[ -n "$SPARK_TESTING" ]]; then
   unset YARN_CONF_DIR

http://git-wip-us.apache.org/repos/asf/spark/blob/45f8053b/bin/spark-class2.cmd
----------------------------------------------------------------------
diff --git a/bin/spark-class2.cmd b/bin/spark-class2.cmd
index c4fadb8..565b87c 100644
--- a/bin/spark-class2.cmd
+++ b/bin/spark-class2.cmd
@@ -28,33 +28,27 @@ if "x%1"=="x" (
   exit /b 1
 )
 
-rem Find assembly jar
-set SPARK_ASSEMBLY_JAR=0
-
+rem Find Spark jars.
+rem TODO: change the directory name when Spark jars move from "lib".
 if exist "%SPARK_HOME%\RELEASE" (
-  set ASSEMBLY_DIR="%SPARK_HOME%\lib"
+  set SPARK_JARS_DIR="%SPARK_HOME%\lib"
 ) else (
-  set ASSEMBLY_DIR="%SPARK_HOME%\assembly\target\scala-%SPARK_SCALA_VERSION%"
+  set SPARK_JARS_DIR="%SPARK_HOME%\assembly\target\scala-%SPARK_SCALA_VERSION%"
 )
 
-for %%d in (%ASSEMBLY_DIR%\spark-assembly*hadoop*.jar) do (
-  set SPARK_ASSEMBLY_JAR=%%d
-)
-if "%SPARK_ASSEMBLY_JAR%"=="0" (
+if not exist "%SPARK_JARS_DIR%"\ (
   echo Failed to find Spark assembly JAR.
   echo You need to build Spark before running this program.
   exit /b 1
 )
 
-set LAUNCH_CLASSPATH=%SPARK_ASSEMBLY_JAR%
+set LAUNCH_CLASSPATH=%SPARK_JARS_DIR%\*
 
 rem Add the launcher build dir to the classpath if requested.
 if not "x%SPARK_PREPEND_CLASSES%"=="x" (
   set LAUNCH_CLASSPATH="%SPARK_HOME%\launcher\target\scala-%SPARK_SCALA_VERSION%\classes;%LAUNCH_CLASSPATH%"
 )
 
-set _SPARK_ASSEMBLY=%SPARK_ASSEMBLY_JAR%
-
 rem Figure out where java is.
 set RUNNER=java
 if not "x%JAVA_HOME%"=="x" set RUNNER=%JAVA_HOME%\bin\java

http://git-wip-us.apache.org/repos/asf/spark/blob/45f8053b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
----------------------------------------------------------------------
diff --git a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
index 20387e0..f6c7e07 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
@@ -19,7 +19,6 @@ package org.apache.spark.launcher;
 
 import java.io.BufferedReader;
 import java.io.File;
-import java.io.FileFilter;
 import java.io.FileInputStream;
 import java.io.InputStreamReader;
 import java.io.IOException;
@@ -172,21 +171,13 @@ abstract class AbstractCommandBuilder {
       addToClassPath(cp, String.format("%s/core/target/jars/*", sparkHome));
     }
 
-    // We can't rely on the ENV_SPARK_ASSEMBLY variable to be set. Certain situations, such as
-    // when running unit tests, or user code that embeds Spark and creates a SparkContext
-    // with a local or local-cluster master, will cause this code to be called from an
-    // environment where that env variable is not guaranteed to exist.
-    //
-    // For the testing case, we rely on the test code to set and propagate the test classpath
-    // appropriately.
-    //
-    // For the user code case, we fall back to looking for the Spark assembly under SPARK_HOME.
-    // That duplicates some of the code in the shell scripts that look for the assembly, though.
-    String assembly = getenv(ENV_SPARK_ASSEMBLY);
-    if (assembly == null && !isTesting) {
-      assembly = findAssembly();
+    // Add Spark jars to the classpath. For the testing case, we rely on the test code to set and
+    // propagate the test classpath appropriately. For normal invocation, look for the jars
+    // directory under SPARK_HOME.
+    String jarsDir = findJarsDir(!isTesting);
+    if (jarsDir != null) {
+      addToClassPath(cp, join(File.separator, jarsDir, "*"));
     }
-    addToClassPath(cp, assembly);
 
     // Datanucleus jars must be included on the classpath. Datanucleus jars do not work if only
     // included in the uber jar as plugin.xml metadata is lost. Both sbt and maven will populate
@@ -320,28 +311,25 @@ abstract class AbstractCommandBuilder {
     return props;
   }
 
-  private String findAssembly() {
+  private String findJarsDir(boolean failIfNotFound) {
+    // TODO: change to the correct directory once the assembly build is changed.
     String sparkHome = getSparkHome();
     File libdir;
     if (new File(sparkHome, "RELEASE").isFile()) {
       libdir = new File(sparkHome, "lib");
-      checkState(libdir.isDirectory(), "Library directory '%s' does not exist.",
-          libdir.getAbsolutePath());
+      checkState(!failIfNotFound || libdir.isDirectory(),
+        "Library directory '%s' does not exist.",
+        libdir.getAbsolutePath());
     } else {
       libdir = new File(sparkHome, String.format("assembly/target/scala-%s", getScalaVersion()));
-    }
-
-    final Pattern re = Pattern.compile("spark-assembly.*hadoop.*\\.jar");
-    FileFilter filter = new FileFilter() {
-      @Override
-      public boolean accept(File file) {
-        return file.isFile() && re.matcher(file.getName()).matches();
+      if (!libdir.isDirectory()) {
+        checkState(!failIfNotFound,
+          "Library directory '%s' does not exist; make sure Spark is built.",
+          libdir.getAbsolutePath());
+        libdir = null;
       }
-    };
-    File[] assemblies = libdir.listFiles(filter);
-    checkState(assemblies != null && assemblies.length > 0, "No assemblies found in '%s'.", libdir);
-    checkState(assemblies.length == 1, "Multiple assemblies found in '%s'.", libdir);
-    return assemblies[0].getAbsolutePath();
+    }
+    return libdir != null ? libdir.getAbsolutePath() : null;
   }
 
   private String getConfDir() {

http://git-wip-us.apache.org/repos/asf/spark/blob/45f8053b/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java
----------------------------------------------------------------------
diff --git a/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java b/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java
index 7942d73..37afafe 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java
@@ -30,7 +30,6 @@ class CommandBuilderUtils {
   static final String DEFAULT_MEM = "1g";
   static final String DEFAULT_PROPERTIES_FILE = "spark-defaults.conf";
   static final String ENV_SPARK_HOME = "SPARK_HOME";
-  static final String ENV_SPARK_ASSEMBLY = "_SPARK_ASSEMBLY";
 
   /** The set of known JVM vendors. */
   static enum JavaVendor {

http://git-wip-us.apache.org/repos/asf/spark/blob/45f8053b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
----------------------------------------------------------------------
diff --git a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
index a85afb5..00f9671 100644
--- a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
+++ b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
@@ -282,7 +282,6 @@ public class SparkSubmitCommandBuilderSuite extends BaseSuite {
   private SparkSubmitCommandBuilder newCommandBuilder(List<String> args) {
     SparkSubmitCommandBuilder builder = new SparkSubmitCommandBuilder(args);
     builder.childEnv.put(CommandBuilderUtils.ENV_SPARK_HOME, System.getProperty("spark.test.home"));
-    builder.childEnv.put(CommandBuilderUtils.ENV_SPARK_ASSEMBLY, "dummy");
     return builder;
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org