You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by pr...@apache.org on 2016/07/04 06:57:11 UTC

svn commit: r1751218 - /pig/branches/spark/bin/pig

Author: praveen
Date: Mon Jul  4 06:57:11 2016
New Revision: 1751218

URL: http://svn.apache.org/viewvc?rev=1751218&view=rev
Log:
PIG-4903: Avoid add all spark dependency jars to SPARK_YARN_DIST_FILES and SPARK_DIST_CLASSPATH (Liyun via Praveen)

Modified:
    pig/branches/spark/bin/pig

Modified: pig/branches/spark/bin/pig
URL: http://svn.apache.org/viewvc/pig/branches/spark/bin/pig?rev=1751218&r1=1751217&r2=1751218&view=diff
==============================================================================
--- pig/branches/spark/bin/pig (original)
+++ pig/branches/spark/bin/pig Mon Jul  4 06:57:11 2016
@@ -57,6 +57,28 @@ remaining=()
 includeHCatalog="";
 addJarString=-Dpig.additional.jars.uris\=;
 additionalJars="";
+preArg="";
+isSparkMode=1;
+isSparkLocalMode=1;
+
+#verifyMode(preArg, execType,expectExecType): check whether $preArg is "-x" or "-exectype", if yes 
+#continue to verify whether $execType is equals to $expectExecType
+function verifyMode(){
+  preArg=$1
+  execType=$2
+  expectExecType=$3
+  if [[ "$preArg" == "-x" || "$preArg" == "-exectype" ]]; then
+    execTypeUpperCase=$(echo $execType |tr [a-z] [A-Z])
+    if [[ "$execTypeUpperCase" == "$expectExecType" ]]; then
+        echo 0
+    else 
+        echo 1
+    fi
+  else
+    echo 1
+  fi
+}
+
 # filter command line parameter
 for f in "$@"; do
      if [[ $f == "-secretDebugCmd" || $f == "-printCmdDebug" ]]; then
@@ -70,9 +92,16 @@ for f in "$@"; do
         includeHCatalog=true;
       elif [[ "$includeHCatalog" == "true" && $f == $addJarString* ]]; then
         additionalJars=`echo $f | sed s/$addJarString//`
+      elif [[ $(verifyMode $preArg $f "SPARK") -eq 0 ]]; then
+        isSparkMode=0 
+        remaining[${#remaining[@]}]="$f"
+      elif [[ $(verifyMode $preArg $f "SPARK_LOCAL") -eq 0 ]]; then
+        isSparkLocalMode=0
+        remaining[${#remaining[@]}]="$f"
       else
         remaining[${#remaining[@]}]="$f"
      fi
+     preArg=$f
 done
 
 # resolve links - $0 may be a softlink
@@ -393,6 +422,39 @@ export SPARK_JARS=${SPARK_YARN_DIST_FILE
 export SPARK_DIST_CLASSPATH
 ################# ADDING SPARK DEPENDENCIES ##################
 
+################# ADDING SPARK DEPENDENCIES ##################
+# For spark_local mode:
+if [ $isSparkLocalMode -eq 0 ]; then
+#SPARK_MASTER is forced to be "local" in spark_local mode
+        SPARK_MASTER="local"
+	for f in $PIG_HOME/lib/spark/*.jar; do
+	        CLASSPATH=${CLASSPATH}:$f;
+	done
+fi
+
+# For spark mode:
+# Please specify SPARK_HOME first so that we can locate $SPARK_HOME/lib/spark-assembly*.jar,
+# we will add spark-assembly*.jar to the classpath.
+if [ $isSparkMode -eq 0 ]; then
+	if [ -z "$SPARK_HOME" ]; then
+	   echo "Error: SPARK_HOME is not set!"  
+	   exit 1
+	fi
+	
+	# Please specify SPARK_JAR which is the hdfs path of spark-assembly*.jar to allow YARN to cache spark-assembly*.jar on nodes so that it doesn't need to be distributed each time an application runs.
+	if [ -z "$SPARK_JAR" ]; then
+	   echo "Error: SPARK_JAR is not set, SPARK_JAR stands for the hdfs location of spark-assembly*.jar. This allows YARN to cache spark-assembly*.jar on nodes so that it doesn't need to be distributed each time an application runs."  
+	   exit 1
+	fi
+	
+	if [ -n "$SPARK_HOME" ]; then
+	    echo "Using Spark Home: " ${SPARK_HOME}
+	    SPARK_ASSEMBLY_JAR=`ls ${SPARK_HOME}/lib/spark-assembly*`
+	    CLASSPATH=${CLASSPATH}:$SPARK_ASSEMBLY_JAR
+	fi
+fi
+################# ADDING SPARK DEPENDENCIES ##################
+
 # run it
 if [ -n "$HADOOP_BIN" ]; then
     if [ "$debug" == "true" ]; then