You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by pr...@apache.org on 2016/07/04 06:57:11 UTC
svn commit: r1751218 - /pig/branches/spark/bin/pig
Author: praveen
Date: Mon Jul 4 06:57:11 2016
New Revision: 1751218
URL: http://svn.apache.org/viewvc?rev=1751218&view=rev
Log:
PIG-4903: Avoid add all spark dependency jars to SPARK_YARN_DIST_FILES and SPARK_DIST_CLASSPATH (Liyun via Praveen)
Modified:
pig/branches/spark/bin/pig
Modified: pig/branches/spark/bin/pig
URL: http://svn.apache.org/viewvc/pig/branches/spark/bin/pig?rev=1751218&r1=1751217&r2=1751218&view=diff
==============================================================================
--- pig/branches/spark/bin/pig (original)
+++ pig/branches/spark/bin/pig Mon Jul 4 06:57:11 2016
@@ -57,6 +57,28 @@ remaining=()
includeHCatalog="";
addJarString=-Dpig.additional.jars.uris\=;
additionalJars="";
+preArg="";
+isSparkMode=1;
+isSparkLocalMode=1;
+
+#verifyMode(preArg, execType,expectExecType): check whether $preArg is "-x" or "-exectype", if yes
+#continue to verify whether $execType is equals to $expectExecType
+function verifyMode(){
+ preArg=$1
+ execType=$2
+ expectExecType=$3
+ if [[ "$preArg" == "-x" || "$preArg" == "-exectype" ]]; then
+ execTypeUpperCase=$(echo $execType |tr [a-z] [A-Z])
+ if [[ "$execTypeUpperCase" == "$expectExecType" ]]; then
+ echo 0
+ else
+ echo 1
+ fi
+ else
+ echo 1
+ fi
+}
+
# filter command line parameter
for f in "$@"; do
if [[ $f == "-secretDebugCmd" || $f == "-printCmdDebug" ]]; then
@@ -70,9 +92,16 @@ for f in "$@"; do
includeHCatalog=true;
elif [[ "$includeHCatalog" == "true" && $f == $addJarString* ]]; then
additionalJars=`echo $f | sed s/$addJarString//`
+ elif [[ $(verifyMode $preArg $f "SPARK") -eq 0 ]]; then
+ isSparkMode=0
+ remaining[${#remaining[@]}]="$f"
+ elif [[ $(verifyMode $preArg $f "SPARK_LOCAL") -eq 0 ]]; then
+ isSparkLocalMode=0
+ remaining[${#remaining[@]}]="$f"
else
remaining[${#remaining[@]}]="$f"
fi
+ preArg=$f
done
# resolve links - $0 may be a softlink
@@ -393,6 +422,39 @@ export SPARK_JARS=${SPARK_YARN_DIST_FILE
export SPARK_DIST_CLASSPATH
################# ADDING SPARK DEPENDENCIES ##################
+################# ADDING SPARK DEPENDENCIES ##################
+# For spark_local mode:
+if [ $isSparkLocalMode -eq 0 ]; then
+#SPARK_MASTER is forced to be "local" in spark_local mode
+ SPARK_MASTER="local"
+ for f in $PIG_HOME/lib/spark/*.jar; do
+ CLASSPATH=${CLASSPATH}:$f;
+ done
+fi
+
+# For spark mode:
+# Please specify SPARK_HOME first so that we can locate $SPARK_HOME/lib/spark-assembly*.jar,
+# we will add spark-assembly*.jar to the classpath.
+if [ $isSparkMode -eq 0 ]; then
+ if [ -z "$SPARK_HOME" ]; then
+ echo "Error: SPARK_HOME is not set!"
+ exit 1
+ fi
+
+ # Please specify SPARK_JAR which is the hdfs path of spark-assembly*.jar to allow YARN to cache spark-assembly*.jar on nodes so that it doesn't need to be distributed each time an application runs.
+ if [ -z "$SPARK_JAR" ]; then
+ echo "Error: SPARK_JAR is not set, SPARK_JAR stands for the hdfs location of spark-assembly*.jar. This allows YARN to cache spark-assembly*.jar on nodes so that it doesn't need to be distributed each time an application runs."
+ exit 1
+ fi
+
+ if [ -n "$SPARK_HOME" ]; then
+ echo "Using Spark Home: " ${SPARK_HOME}
+ SPARK_ASSEMBLY_JAR=`ls ${SPARK_HOME}/lib/spark-assembly*`
+ CLASSPATH=${CLASSPATH}:$SPARK_ASSEMBLY_JAR
+ fi
+fi
+################# ADDING SPARK DEPENDENCIES ##################
+
# run it
if [ -n "$HADOOP_BIN" ]; then
if [ "$debug" == "true" ]; then