You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by an...@apache.org on 2014/09/03 21:19:54 UTC

git commit: [SPARK-3216] [SPARK-3232] Spark-shell is broken in branch-1.0 / Backport SPARK-3006 into branch-1.0

Repository: spark
Updated Branches:
  refs/heads/branch-1.0 d47581638 -> 8dd7690e2


[SPARK-3216] [SPARK-3232] Spark-shell is broken in branch-1.0 / Backport SPARK-3006 into branch-1.0

Author: Kousuke Saruta <sa...@oss.nttdata.co.jp>
Author: Andrew Or <an...@gmail.com>

Closes #2136 from sarutak/SPARK-3216 and squashes the following commits:

5c20d2d [Kousuke Saruta] Merge branch 'branch-1.0' of git://git.apache.org/spark into SPARK-3216
af0517f [Kousuke Saruta] Modified spark-shell.cmd, backporting SPARK-3006
bbc7221 [Kousuke Saruta] Replace 4-space into 2-space
15cd980 [Andrew Or] Fix spark-shell in branch-1.0


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8dd7690e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8dd7690e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8dd7690e

Branch: refs/heads/branch-1.0
Commit: 8dd7690e2b4c3269d2777d3e208903bf596d1509
Parents: d475816
Author: Kousuke Saruta <sa...@oss.nttdata.co.jp>
Authored: Wed Sep 3 12:18:16 2014 -0700
Committer: Andrew Or <an...@gmail.com>
Committed: Wed Sep 3 12:19:13 2014 -0700

----------------------------------------------------------------------
 bin/pyspark                                     | 18 ++++--
 bin/spark-shell                                 | 40 +++++++------
 bin/spark-shell.cmd                             |  2 +-
 bin/utils.sh                                    | 59 ++++++++++++++++++++
 .../spark/deploy/SparkSubmitArguments.scala     |  4 ++
 python/pyspark/java_gateway.py                  |  2 +-
 6 files changed, 103 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/8dd7690e/bin/pyspark
----------------------------------------------------------------------
diff --git a/bin/pyspark b/bin/pyspark
index acf3b27..b000406 100755
--- a/bin/pyspark
+++ b/bin/pyspark
@@ -23,12 +23,18 @@ FWDIR="$(cd `dirname $0`/..; pwd)"
 # Export this as SPARK_HOME
 export SPARK_HOME="$FWDIR"
 
+source $FWDIR/bin/utils.sh
+
 SCALA_VERSION=2.10
 
-if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
+function usage() {
   echo "Usage: ./bin/pyspark [options]" 1>&2
   $FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
   exit 0
+}
+
+if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
+  usage
 fi
 
 # Exit if the user hasn't compiled Spark
@@ -66,10 +72,11 @@ fi
 # Build up arguments list manually to preserve quotes and backslashes.
 # We export Spark submit arguments as an environment variable because shell.py must run as a
 # PYTHONSTARTUP script, which does not take in arguments. This is required for IPython notebooks.
-
+SUBMIT_USAGE_FUNCTION=usage
+gatherSparkSubmitOpts "$@"
 PYSPARK_SUBMIT_ARGS=""
 whitespace="[[:space:]]"
-for i in "$@"; do
+for i in "${SUBMISSION_OPTS[@]}"; do
   if [[ $i =~ \" ]]; then i=$(echo $i | sed 's/\"/\\\"/g'); fi
   if [[ $i =~ $whitespace ]]; then i=\"$i\"; fi
   PYSPARK_SUBMIT_ARGS="$PYSPARK_SUBMIT_ARGS $i"
@@ -80,7 +87,10 @@ export PYSPARK_SUBMIT_ARGS
 if [[ "$1" =~ \.py$ ]]; then
   echo -e "\nWARNING: Running python applications through ./bin/pyspark is deprecated as of Spark 1.0." 1>&2
   echo -e "Use ./bin/spark-submit <python file>\n" 1>&2
-  exec $FWDIR/bin/spark-submit "$@"
+  primary=$1
+  shift
+  gatherSparkSubmitOpts "$@"
+  exec $FWDIR/bin/spark-submit "${SUBMISSION_OPTS[@]}" $primary "${APPLICATION_OPTS[@]}"
 else
   # Only use ipython if no command line arguments were provided [SPARK-1134]
   if [[ "$IPYTHON" = "1" ]]; then

http://git-wip-us.apache.org/repos/asf/spark/blob/8dd7690e/bin/spark-shell
----------------------------------------------------------------------
diff --git a/bin/spark-shell b/bin/spark-shell
index 850e950..86c5211 100755
--- a/bin/spark-shell
+++ b/bin/spark-shell
@@ -31,27 +31,35 @@ set -o posix
 ## Global script variables
 FWDIR="$(cd `dirname $0`/..; pwd)"
 
-if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
+function usage() {
   echo "Usage: ./bin/spark-shell [options]"
   $FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
   exit 0
+}
+
+if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
+  usage
 fi
 
-function main(){
-    if $cygwin; then
-        # Workaround for issue involving JLine and Cygwin
-        # (see http://sourceforge.net/p/jline/bugs/40/).
-        # If you're using the Mintty terminal emulator in Cygwin, may need to set the
-        # "Backspace sends ^H" setting in "Keys" section of the Mintty options
-        # (see https://github.com/sbt/sbt/issues/562).
-        stty -icanon min 1 -echo > /dev/null 2>&1
-        export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix"
-        $FWDIR/bin/spark-submit spark-shell "$@" --class org.apache.spark.repl.Main
-        stty icanon echo > /dev/null 2>&1
-    else
-        export SPARK_SUBMIT_OPTS
-        $FWDIR/bin/spark-submit spark-shell "$@" --class org.apache.spark.repl.Main
-    fi
+source $FWDIR/bin/utils.sh
+SUBMIT_USAGE_FUNCTION=usage
+gatherSparkSubmitOpts "$@"
+
+function main() {
+  if $cygwin; then
+    # Workaround for issue involving JLine and Cygwin
+    # (see http://sourceforge.net/p/jline/bugs/40/).
+    # If you're using the Mintty terminal emulator in Cygwin, may need to set the
+    # "Backspace sends ^H" setting in "Keys" section of the Mintty options
+    # (see https://github.com/sbt/sbt/issues/562).
+    stty -icanon min 1 -echo > /dev/null 2>&1
+    export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix"
+    $FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
+    stty icanon echo > /dev/null 2>&1
+  else
+    export SPARK_SUBMIT_OPTS
+    $FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
+  fi
 }
 
 # Copy restore-TTY-on-exit functions from Scala script so spark-shell exits properly even in

http://git-wip-us.apache.org/repos/asf/spark/blob/8dd7690e/bin/spark-shell.cmd
----------------------------------------------------------------------
diff --git a/bin/spark-shell.cmd b/bin/spark-shell.cmd
index 4b9708a..b77154c 100755
--- a/bin/spark-shell.cmd
+++ b/bin/spark-shell.cmd
@@ -19,4 +19,4 @@ rem
 
 set SPARK_HOME=%~dp0..
 
-cmd /V /E /C %SPARK_HOME%\bin\spark-submit.cmd spark-shell %* --class org.apache.spark.repl.Main
+cmd /V /E /C %SPARK_HOME%\bin\spark-submit.cmd %* --class org.apache.spark.repl.Main spark-shell

http://git-wip-us.apache.org/repos/asf/spark/blob/8dd7690e/bin/utils.sh
----------------------------------------------------------------------
diff --git a/bin/utils.sh b/bin/utils.sh
new file mode 100644
index 0000000..0804b1e
--- /dev/null
+++ b/bin/utils.sh
@@ -0,0 +1,59 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Gather all all spark-submit options into SUBMISSION_OPTS
+function gatherSparkSubmitOpts() {
+
+  if [ -z "$SUBMIT_USAGE_FUNCTION" ]; then
+    echo "Function for printing usage of $0 is not set." 1>&2
+    echo "Please set usage function to shell variable 'SUBMIT_USAGE_FUNCTION' in $0" 1>&2
+    exit 1
+  fi
+
+  # NOTE: If you add or remove spark-sumbmit options,
+  # modify NOT ONLY this script but also SparkSubmitArgument.scala
+  SUBMISSION_OPTS=()
+  APPLICATION_OPTS=()
+  while (($#)); do
+    case "$1" in
+      --master | --deploy-mode | --class | --name | --jars | --py-files | --files | \
+      --conf | --properties-file | --driver-memory | --driver-java-options | \
+      --driver-library-path | --driver-class-path | --executor-memory | --driver-cores | \
+      --total-executor-cores | --executor-cores | --queue | --num-executors | --archives)
+        if [[ $# -lt 2 ]]; then
+          "$SUBMIT_USAGE_FUNCTION"
+          exit 1;
+        fi
+        SUBMISSION_OPTS+=("$1"); shift
+        SUBMISSION_OPTS+=("$1"); shift
+        ;;
+
+      --verbose | -v | --supervise)
+        SUBMISSION_OPTS+=("$1"); shift
+        ;;
+
+      *)
+        APPLICATION_OPTS+=("$1"); shift
+        ;;
+    esac
+  done
+
+  export SUBMISSION_OPTS
+  export APPLICATION_OPTS
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/8dd7690e/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index 4aab5a7..a583f19 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -206,6 +206,10 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
 
     parse(opts)
 
+    /**
+     * NOTE: If you add or remove spark-submit options,
+     * modify NOT ONLY this file but also utils.sh
+     */
     def parse(opts: Seq[String]): Unit = opts match {
       case ("--name") :: value :: tail =>
         name = value

http://git-wip-us.apache.org/repos/asf/spark/blob/8dd7690e/python/pyspark/java_gateway.py
----------------------------------------------------------------------
diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py
index 91ae826..2b336e5 100644
--- a/python/pyspark/java_gateway.py
+++ b/python/pyspark/java_gateway.py
@@ -38,7 +38,7 @@ def launch_gateway():
         submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS")
         submit_args = submit_args if submit_args is not None else ""
         submit_args = shlex.split(submit_args)
-        command = [os.path.join(SPARK_HOME, script), "pyspark-shell"] + submit_args
+        command = [os.path.join(SPARK_HOME, script)] + submit_args + ["pyspark-shell"]
         if not on_windows:
             # Don't send ctrl-c / SIGINT to the Java gateway:
             def preexec_func():


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org