You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ad...@apache.org on 2014/03/30 04:51:23 UTC

git commit: [SPARK-1186] : Enrich the Spark Shell to support additional arguments.

Repository: spark
Updated Branches:
  refs/heads/master af3746ce0 -> fda86d8b4


[SPARK-1186] : Enrich the Spark Shell to support additional arguments.

Enrich the Spark Shell functionality to support the following options.

```
Usage: spark-shell [OPTIONS]

OPTIONS:
    -h  --help              : Print this help information.
    -c  --cores             : The maximum number of cores to be used by the Spark Shell.
    -em --executor-memory   : The memory used by each executor of the Spark Shell, the number
                              is followed by m for megabytes or g for gigabytes, e.g. "1g".
    -dm --driver-memory     : The memory used by the Spark Shell, the number is followed
                              by m for megabytes or g for gigabytes, e.g. "1g".
    -m  --master            : A full string that describes the Spark Master, defaults to "local"
                              e.g. "spark://localhost:7077".
    --log-conf              : Enables logging of the supplied SparkConf as INFO at start of the
                              Spark Context.

e.g.
    spark-shell -m spark://localhost:7077 -c 4 -dm 512m -em 2g
```

**Note**: this commit reflects the changes applied to _master_ based on [5d98cfc1].

[ticket: SPARK-1186] : Enrich the Spark Shell to support additional arguments.
                        https://spark-project.atlassian.net/browse/SPARK-1186

Author      : bernardo.gomezpalcio@gmail.com

Author: Bernardo Gomez Palacio <be...@gmail.com>

Closes #116 from berngp/feature/enrich-spark-shell and squashes the following commits:

c5f455f [Bernardo Gomez Palacio] [SPARK-1186] : Enrich the Spark Shell to support additional arguments.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fda86d8b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fda86d8b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fda86d8b

Branch: refs/heads/master
Commit: fda86d8b46a1cc484d11ac5446d8cc2a86429b9b
Parents: af3746c
Author: Bernardo Gomez Palacio <be...@gmail.com>
Authored: Sat Mar 29 19:49:22 2014 -0700
Committer: Aaron Davidson <aa...@databricks.com>
Committed: Sat Mar 29 19:49:22 2014 -0700

----------------------------------------------------------------------
 bin/spark-shell | 226 ++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 168 insertions(+), 58 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/fda86d8b/bin/spark-shell
----------------------------------------------------------------------
diff --git a/bin/spark-shell b/bin/spark-shell
index 861ab60..fac006c 100755
--- a/bin/spark-shell
+++ b/bin/spark-shell
@@ -30,67 +30,189 @@ esac
 # Enter posix mode for bash
 set -o posix
 
-CORE_PATTERN="^[0-9]+$"
-MEM_PATTERN="^[0-9]+[m|g|M|G]$"
-
+## Global script variables
 FWDIR="$(cd `dirname $0`/..; pwd)"
 
-if [ "$1" = "--help" ] || [ "$1" = "-h" ]; then
-	echo "Usage: spark-shell [OPTIONS]"
-	echo "OPTIONS:"
-	echo "-c --cores num, the maximum number of cores to be used by the spark shell"
-	echo "-em --execmem num[m|g], the memory used by each executor of spark shell"
-	echo "-dm --drivermem num[m|g], the memory used by the spark shell and driver"
-	echo "-h --help, print this help information" 
-	exit
-fi
+SPARK_REPL_OPTS="${SPARK_REPL_OPTS:-""}"
+DEFAULT_MASTER="local"
+MASTER=${MASTER:-""}
+
+info_log=0
+
+#CLI Color Templates
+txtund=$(tput sgr 0 1)          # Underline
+txtbld=$(tput bold)             # Bold
+bldred=${txtbld}$(tput setaf 1) # red
+bldyel=${txtbld}$(tput setaf 3) # yellow
+bldblu=${txtbld}$(tput setaf 4) # blue
+bldwht=${txtbld}$(tput setaf 7) # white
+txtrst=$(tput sgr0)             # Reset
+info=${bldwht}*${txtrst}        # Feedback
+pass=${bldblu}*${txtrst}
+warn=${bldred}*${txtrst}
+ques=${bldblu}?${txtrst}
+
+# Helper function to describe the script usage
+function usage() {
+    cat << EOF
+${txtbld}Usage${txtrst}: spark-shell [OPTIONS]
+
+${txtbld}OPTIONS${txtrst}:
+    -h  --help              : Print this help information.
+    -c  --cores             : The maximum number of cores to be used by the Spark Shell.
+    -em --executor-memory   : The memory used by each executor of the Spark Shell, the number 
+                              is followed by m for megabytes or g for gigabytes, e.g. "1g".
+    -dm --driver-memory     : The memory used by the Spark Shell, the number is followed 
+                              by m for megabytes or g for gigabytes, e.g. "1g".
+    -m  --master            : A full string that describes the Spark Master, defaults to "local"
+                              e.g. "spark://localhost:7077".
+    --log-conf              : Enables logging of the supplied SparkConf as INFO at start of the
+                              Spark Context.
+
+e.g.
+    spark-shell -m spark://localhost:7077 -c 4 -dm 512m -em 2g
+
+EOF
+}
+
+function out_error(){
+    echo -e "${txtund}${bldred}ERROR${txtrst}: $1"
+    usage
+    exit 1
+}
+
+function log_info(){
+    [ $info_log -eq 1 ] && echo -e "${bldyel}INFO${txtrst}: $1"
+}
+
+function log_warn(){
+    echo -e "${txtund}${bldyel}WARN${txtrst}: $1"
+}
 
-for o in "$@"; do
-  if [ "$1" = "-c" -o "$1" = "--cores" ]; then
-    shift
+# PATTERNS used to validate more than one optional arg.
+ARG_FLAG_PATTERN="^-"
+MEM_PATTERN="^[0-9]+[m|g|M|G]$"
+NUM_PATTERN="^[0-9]+$"
+PORT_PATTERN="^[0-9]+$"
+
+# Setters for optional args.
+function set_cores(){
+    CORE_PATTERN="^[0-9]+$"
     if [[ "$1" =~ $CORE_PATTERN ]]; then
-      SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.cores.max=$1"
-      shift
+        SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.cores.max=$1"
     else
-      echo "ERROR: wrong format for -c/--cores"
-      exit 1
+        out_error "wrong format for $2"
     fi
-  fi
-  if [ "$1" = "-em" -o "$1" = "--execmem" ]; then
-    shift
+}
+
+function set_em(){
     if [[ $1 =~ $MEM_PATTERN ]]; then
       SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.executor.memory=$1"
-      shift
     else
-      echo "ERROR: wrong format for --execmem/-em"
-      exit 1
+      out_error "wrong format for $2"
     fi
-  fi
-  if [ "$1" = "-dm" -o "$1" = "--drivermem" ]; then
-    shift
+}
+
+function set_dm(){
     if [[ $1 =~ $MEM_PATTERN ]]; then
       export SPARK_DRIVER_MEMORY=$1
-      shift
     else
-      echo "ERROR: wrong format for --drivermem/-dm"
-      exit 1
+      out_error "wrong format for $2"
     fi
-  fi
-done
+}
+
+function set_spark_log_conf(){
+    SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.logConf=$1"
+}
 
-# Set MASTER from spark-env if possible
-DEFAULT_SPARK_MASTER_PORT=7077
-if [ -z "$MASTER" ]; then
-  . $FWDIR/bin/load-spark-env.sh
-  if [ "x" != "x$SPARK_MASTER_IP" ]; then
-    if [ "y" != "y$SPARK_MASTER_PORT" ]; then
-      SPARK_MASTER_PORT="${SPARK_MASTER_PORT}"
+function set_spark_master(){
+    if ! [[ "$1" =~ $ARG_FLAG_PATTERN ]]; then
+        MASTER="$1"
     else
-      SPARK_MASTER_PORT=$DEFAULT_SPARK_MASTER_PORT
+        out_error "wrong format for $2"
+    fi
+}
+
+function resolve_spark_master(){
+    # Set MASTER from spark-env if possible
+    DEFAULT_SPARK_MASTER_PORT=7077
+    if [ -z "$MASTER" ]; then
+        . $FWDIR/bin/load-spark-env.sh
+        if [ -n "$SPARK_MASTER_IP" ]; then
+            SPARK_MASTER_PORT="${SPARK_MASTER_PORT:-"$DEFAULT_SPARK_MASTER_PORT"}"
+            export MASTER="spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}"
+        fi
+    fi
+
+    if [ -z "$MASTER" ]; then
+        MASTER="$DEFAULT_MASTER"
     fi
-    export MASTER="spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}"
-  fi
-fi
+
+}
+
+function main(){
+    log_info "Base Directory set to $FWDIR"
+    
+    resolve_spark_master
+    log_info "Spark Master is $MASTER"
+
+    log_info "Spark REPL options  $SPARK_REPL_OPTS"
+    if $cygwin; then
+        # Workaround for issue involving JLine and Cygwin
+        # (see http://sourceforge.net/p/jline/bugs/40/).
+        # If you're using the Mintty terminal emulator in Cygwin, may need to set the
+        # "Backspace sends ^H" setting in "Keys" section of the Mintty options
+        # (see https://github.com/sbt/sbt/issues/562).
+        stty -icanon min 1 -echo > /dev/null 2>&1
+        export SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Djline.terminal=unix"
+        $FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
+        stty icanon echo > /dev/null 2>&1
+    else
+        export SPARK_REPL_OPTS
+        $FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
+    fi
+}
+
+for option in "$@"
+do
+     case $option in
+         -h  | --help )
+             usage
+             exit 1
+             ;;
+         -c  | --cores)
+             shift
+             _1=$1
+             shift
+             set_cores $_1 "-c/--cores"
+             ;;
+         -em | --executor-memory)
+             shift
+             _1=$1
+             shift
+             set_em $_1 "-em/--executor-memory"
+             ;;
+         -dm | --driver-memory)
+             shift
+             _1=$1
+             shift
+             set_dm $_1 "-dm/--driver-memory"
+             ;;
+         -m | --master)
+             shift
+             _1=$1
+             shift
+             set_spark_master $_1 "-m/--master"
+             ;;
+         --log-conf)
+             shift
+             set_spark_log_conf "true"
+             info_log=1
+             ;;
+         ?)
+             ;;
+     esac
+done
 
 # Copy restore-TTY-on-exit functions from Scala script so spark-shell exits properly even in
 # binary distribution of Spark where Scala is not installed
@@ -120,22 +242,10 @@ if [[ ! $? ]]; then
   saved_stty=""
 fi
 
-if $cygwin; then
-  # Workaround for issue involving JLine and Cygwin
-  # (see http://sourceforge.net/p/jline/bugs/40/).
-  # If you're using the Mintty terminal emulator in Cygwin, may need to set the
-  # "Backspace sends ^H" setting in "Keys" section of the Mintty options
-  # (see https://github.com/sbt/sbt/issues/562).
-  stty -icanon min 1 -echo > /dev/null 2>&1
-  export SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Djline.terminal=unix"
-  $FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
-  stty icanon echo > /dev/null 2>&1
-else
-  export SPARK_REPL_OPTS
-  $FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
-fi
+main
 
 # record the exit status lest it be overwritten:
 # then reenable echo and propagate the code.
 exit_status=$?
 onExit
+