You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by zl...@apache.org on 2017/07/25 01:54:55 UTC

svn commit: r1802880 - in /pig/trunk: CHANGES.txt bin/pig

Author: zly
Date: Tue Jul 25 01:54:54 2017
New Revision: 1802880

URL: http://svn.apache.org/viewvc?rev=1802880&view=rev
Log:
PIG-5246: Modify bin/pig about SPARK_HOME, SPARK_ASSEMBLY_JAR after upgrading spark to 2 (liyunzhang)

Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/bin/pig

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1802880&r1=1802879&r2=1802880&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Tue Jul 25 01:54:54 2017
@@ -38,6 +38,8 @@ OPTIMIZATIONS
  
 BUG FIXES
 
+PIG-5246: Modify bin/pig about SPARK_HOME, SPARK_ASSEMBLY_JAR after upgrading spark to 2 (liyunzhang)
+
 PIG-3655: BinStorage and InterStorage approach to record markers is broken (szita)
 
 PIG-5274: TestEvalPipelineLocal#testSetLocationCalledInFE is failing in spark mode after PIG-5157 (nkollar via szita)

Modified: pig/trunk/bin/pig
URL: http://svn.apache.org/viewvc/pig/trunk/bin/pig?rev=1802880&r1=1802879&r2=1802880&view=diff
==============================================================================
--- pig/trunk/bin/pig (original)
+++ pig/trunk/bin/pig Tue Jul 25 01:54:54 2017
@@ -60,6 +60,7 @@ additionalJars="";
 prevArgExecType=false;
 isSparkMode=false;
 isSparkLocalMode=false;
+sparkversion=2;
 
 #verify the execType is SPARK or SPARK_LOCAL or not
 function processExecType(){
@@ -402,18 +403,34 @@ if [ "$isSparkMode"  == "true" ]; then
        echo "Error: SPARK_HOME is not set!"
        exit 1
     fi
-
-    # Please specify SPARK_JAR which is the hdfs path of spark-assembly*.jar to allow YARN to cache spark-assembly*.jar on nodes so that it doesn't need to be distributed each time an application runs.
-    if [ -z "$SPARK_JAR" ]; then
-       echo "Error: SPARK_JAR is not set, SPARK_JAR stands for the hdfs location of spark-assembly*.jar. This allows YARN to cache spark-assembly*.jar on nodes so that it doesn't need to be distributed each time an application runs."
-       exit 1
-    fi
-
-    if [ -n "$SPARK_HOME" ]; then
-        echo "Using Spark Home: " ${SPARK_HOME}
-        SPARK_ASSEMBLY_JAR=`ls ${SPARK_HOME}/lib/spark-assembly*`
-        CLASSPATH=${CLASSPATH}:$SPARK_ASSEMBLY_JAR
-    fi
+    # spark-tags*.jar appears in spark2, spark1 does not include this jar, we use this jar to judge current spark is spark1 or spark2.
+    SPARK_TAG_JAR=`find $SPARK_HOME -name 'spark-tags*.jar'|wc -l`
+    if [ "$SPARK_TAG_JAR" -eq 0 ];then 
+          sparkversion="1"
+    fi
+    if [ "$sparkversion" == "1" ]; then
+        # Please specify SPARK_JAR which is the hdfs path of spark-assembly*.jar to allow YARN to cache spark-assembly*.jar on nodes so that it doesn't need to be distributed each time an application runs.
+        if [ -z "$SPARK_JAR" ]; then
+           echo "Error: SPARK_JAR is not set, SPARK_JAR stands for the hdfs location of spark-assembly*.jar. This
+        allows YARN to cache spark-assembly*.jar on nodes so that it doesn't need to be distributed each time an application runs."
+           exit 1
+        fi
+
+        if [ -n "$SPARK_HOME" ]; then
+            echo "Using Spark Home: " ${SPARK_HOME}
+            SPARK_ASSEMBLY_JAR=`ls ${SPARK_HOME}/lib/spark-assembly*`
+            CLASSPATH=${CLASSPATH}:$SPARK_ASSEMBLY_JAR
+        fi
+    fi
+
+    if [ "$sparkversion" == "2" ]; then
+          if [ -n "$SPARK_HOME" ]; then
+              echo "Using Spark Home: " ${SPARK_HOME}
+              for f in $SPARK_HOME/jars/*.jar; do
+                   CLASSPATH=${CLASSPATH}:$f
+              done
+          fi
+     fi
 fi
 
 #spark-assembly.jar contains jcl-over-slf4j which would create a LogFactory implementation that is incompatible