You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2014/10/30 22:15:36 UTC

svn commit: r1635615 - in /pig/branches/branch-0.14: CHANGES.txt bin/pig bin/pig.cmd src/docs/src/documentation/content/xdocs/basic.xml src/org/apache/pig/Main.java src/org/apache/pig/PigServer.java src/org/apache/pig/impl/io/FileLocalizer.java

Author: daijy
Date: Thu Oct 30 21:15:36 2014
New Revision: 1635615

URL: http://svn.apache.org/r1635615
Log:
PIG-4160: Provide a way to pass local jars in pig.additional.jars when using a remote url for a script

Modified:
    pig/branches/branch-0.14/CHANGES.txt
    pig/branches/branch-0.14/bin/pig
    pig/branches/branch-0.14/bin/pig.cmd
    pig/branches/branch-0.14/src/docs/src/documentation/content/xdocs/basic.xml
    pig/branches/branch-0.14/src/org/apache/pig/Main.java
    pig/branches/branch-0.14/src/org/apache/pig/PigServer.java
    pig/branches/branch-0.14/src/org/apache/pig/impl/io/FileLocalizer.java

Modified: pig/branches/branch-0.14/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.14/CHANGES.txt?rev=1635615&r1=1635614&r2=1635615&view=diff
==============================================================================
--- pig/branches/branch-0.14/CHANGES.txt (original)
+++ pig/branches/branch-0.14/CHANGES.txt Thu Oct 30 21:15:36 2014
@@ -24,6 +24,9 @@ INCOMPATIBLE CHANGES
  
 IMPROVEMENTS
 
+PIG-4160: Provide a way to pass local jars in pig.additional.jars when using a remote
+ url for a script (acoliver via daijy)
+
 PIG-4246: HBaseStorage should implement getShipFiles (rohini)
 
 PIG-3456: Reduce threadlocal conf access in backend for each record (rohini)

Modified: pig/branches/branch-0.14/bin/pig
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.14/bin/pig?rev=1635615&r1=1635614&r2=1635615&view=diff
==============================================================================
--- pig/branches/branch-0.14/bin/pig (original)
+++ pig/branches/branch-0.14/bin/pig Thu Oct 30 21:15:36 2014
@@ -55,7 +55,7 @@ debug=false
 
 remaining=()
 includeHCatalog="";
-addJarString=-Dpig.additional.jars\=;
+addJarString=-Dpig.additional.jars.comma\=;
 additionalJars="";
 # filter command line parameter
 for f in "$@"; do
@@ -187,11 +187,12 @@ if [ "$includeHCatalog" == "true" ]; the
   # get the pig storage handler jar
   pigHCatJarPath=`ls $HCAT_HOME/share/hcatalog/${pigHCatJar}`
 
-  ADDITIONAL_CLASSPATHS=$hiveMetaStoreVersion:$thriftVersion:$hiveExecVersion:$fbJarVersion:$jdoECJarVersion:$slfJarVersion:$hbaseHiveVersion:$hcatJarPath:$hbaseHCatJarPath:$pigHCatJarPath
+  HCAT_CLASSPATHS=$hiveMetaStoreVersion:$thriftVersion:$hiveExecVersion:$fbJarVersion:$jdoECJarVersion:$slfJarVersion:$hbaseHiveVersion:$hcatJarPath:$hbaseHCatJarPath:$pigHCatJarPath
+  ADDITIONAL_CLASSPATHS=file://$hiveMetaStoreVersion,file://$thriftVersion,file://$hiveExecVersion,file://$fbJarVersion,file://$jdoECJarVersion,file://$slfJarVersion,file://$hbaseHiveVersion,file://$hcatJarPath,file://$hbaseHCatJarPath,file://$pigHCatJarPath
   if [ "$additionalJars" != "" ]; then
     ADDITIONAL_CLASSPATHS=$ADDITIONAL_CLASSPATHS:$additionalJars
   fi
-  CLASSPATH=${CLASSPATH}:$ADDITIONAL_CLASSPATHS:$HIVE_HOME/conf
+  CLASSPATH=${CLASSPATH}:$HCAT_CLASSPATHS:$HIVE_HOME/conf
 fi
 
 # Add user-specified CLASSPATH entries via PIG_CLASSPATH
@@ -352,12 +353,12 @@ PIG_OPTS="$PIG_OPTS -Dpig.log.dir=$PIG_L
 PIG_OPTS="$PIG_OPTS -Dpig.log.file=$PIG_LOGFILE"
 PIG_OPTS="$PIG_OPTS -Dpig.home.dir=$PIG_HOME"
 if [ "$includeHCatalog" == "true" ]; then
-  addJars=`echo $PIG_OPTS | awk '{ for (i=1; i<=NF; i++) print $i; }' | grep "\-Dpig.additional.jars=" | sed s/-Dpig.additional.jars=//`
+  addJars=`echo $PIG_OPTS | awk '{ for (i=1; i<=NF; i++) print $i; }' | grep "\-Dpig.additional.jars.comma=" | sed s/-Dpig.additional.jars.comma=//`
   if [ "$addJars" != "" ]; then
     ADDITIONAL_CLASSPATHS=$addJars:$ADDITIONAL_CLASSPATHS
-    PIG_OPTS=`echo $PIG_OPTS | sed 's/-Dpig.additional.jars=[^ ]*//'`
+    PIG_OPTS=`echo $PIG_OPTS | sed 's/-Dpig.additional.jars.comma=[^ ]*//'`
   fi
-  PIG_OPTS="$PIG_OPTS -Dpig.additional.jars=$ADDITIONAL_CLASSPATHS"
+  PIG_OPTS="$PIG_OPTS -Dpig.additional.jars.comma=$ADDITIONAL_CLASSPATHS"
 fi
 
 # run it
@@ -389,7 +390,6 @@ if [ -n "$HADOOP_BIN" ]; then
     done
 
     export HADOOP_CLASSPATH=$CLASSPATH:$HADOOP_CLASSPATH
-    export HADOOP_OPTS="$JAVA_HEAP_MAX $PIG_OPTS $HADOOP_OPTS"
     export HADOOP_CLIENT_OPTS="$JAVA_HEAP_MAX $PIG_OPTS $HADOOP_CLIENT_OPTS"
     if [ "$debug" == "true" ]; then
         echo "dry run:"

Modified: pig/branches/branch-0.14/bin/pig.cmd
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.14/bin/pig.cmd?rev=1635615&r1=1635614&r2=1635615&view=diff
==============================================================================
--- pig/branches/branch-0.14/bin/pig.cmd (original)
+++ pig/branches/branch-0.14/bin/pig.cmd Thu Oct 30 21:15:36 2014
@@ -105,6 +105,7 @@ set PIGARGS=
   )
 
   set HCAT_DEPENDCIES=
+  set HCAT_CLASSPATH=
   if not defined HCAT_FLAG (
     goto HCAT_END
   )
@@ -149,8 +150,8 @@ set PIGARGS=
       echo "HIVE_HOME should be defined"
       exit /b 1
   )
-  set PIG_CLASSPATH=%PIG_CLASSPATH%;%HCAT_DEPENDCIES%;%HIVE_HOME%\conf
-  set PIG_OPTS=%PIG_OPTS% -Dpig.additional.jars=%HCAT_DEPENDCIES%;%PIG_ADDITIONAL_JARS%
+  set PIG_CLASSPATH=%PIG_CLASSPATH%;%HCAT_CLASSPATH%;%HIVE_HOME%\conf
+  set PIG_OPTS=%PIG_OPTS% -Dpig.additional.jars.comma=%HCAT_DEPENDCIES%,%PIG_ADDITIONAL_JARS_COMMA%
 :HCAT_END
 
   if defined PIG_CLASSPATH (
@@ -197,7 +198,8 @@ set PIGARGS=
   :AddJar
     pushd %1
     for /f %%a IN ('dir /b %2') do (
-   	  set HCAT_DEPENDCIES=!HCAT_DEPENDCIES!;%1\%%a
+       set HCAT_CLASSPATH=!HCAT_CLASSPATH!;%1\%%a
+       set HCAT_DEPENDCIES=!HCAT_DEPENDCIES!,file:///%1\%%a
     )
     popd
 :endlocal

Modified: pig/branches/branch-0.14/src/docs/src/documentation/content/xdocs/basic.xml
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.14/src/docs/src/documentation/content/xdocs/basic.xml?rev=1635615&r1=1635614&r2=1635615&view=diff
==============================================================================
--- pig/branches/branch-0.14/src/docs/src/documentation/content/xdocs/basic.xml (original)
+++ pig/branches/branch-0.14/src/docs/src/documentation/content/xdocs/basic.xml Thu Oct 30 21:15:36 2014
@@ -8960,7 +8960,7 @@ B = FOREACH A GENERATE myFunc($0);
    <p id="register-glob">Additionally, JAR files stored in local file systems can be specified as a glob pattern using “*”. Pig will search for matching jars in the local file system, either the relative path (relative to your working directory) or the absolute path. Pig will pick up all JARs that match the glob.</p>
    
    <p><strong>Command Line</strong></p>
-   <p>You can register additional files (to use with your Pig script) via the command line using the -Dpig.additional.jars option. 
+   <p>You can register additional files (to use with your Pig script) via PIG_OPTS environment variable using the -Dpig.additional.jars.comma option. 
 For more information see <a href="udf.html">User Defined Functions</a>.</p>
    </section>
    
@@ -8975,16 +8975,18 @@ A = LOAD 'students';
 B = FOREACH A GENERATE myfunc.MyEvalFunc($0);
 </source>
    
-<p>In this example additional JAR files are registered via the command line.</p>
+<p>In this example additional JAR files are registered via PIG_OPTS environment variable.</p>
 <source>
-pig -Dpig.additional.jars=my.jar:your.jar script.pig
+export PIG_OPTS="-Dpig.additional.jars.comma=my.jar,your.jar"
 </source>
 
-<p>In this example a JAR file stored in HDFS is registered.</p>
+<p>In this example a JAR file stored in HDFS and a local JAR file are registered.</p>
 <source>
-pig -Dpig.additional.jars=hdfs://nn.mydomain.com:9020/myjars/my.jar script.pig
+export PIG_OPTS="-Dpig.additional.jars.comma=hdfs://nn.mydomain.com:9020/myjars/my.jar,file:///home/root/pig/your.jar"
 </source>
 
+<p>Note, the legacy property pig.additional.jars which use colon as separator is still supported. But we recommend to use pig.additional.jars.comma since colon is also used in URL scheme, and thus we cannot use full scheme in the list. We will deprecate pig.additional.jar in future releases.</p>
+
 <p>This example shows how to specify a glob pattern using either a relative path or an absolute path.</p>
 <source>
 register /homes/user/pig/myfunc*.jar

Modified: pig/branches/branch-0.14/src/org/apache/pig/Main.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.14/src/org/apache/pig/Main.java?rev=1635615&r1=1635614&r2=1635615&view=diff
==============================================================================
--- pig/branches/branch-0.14/src/org/apache/pig/Main.java (original)
+++ pig/branches/branch-0.14/src/org/apache/pig/Main.java Thu Oct 30 21:15:36 2014
@@ -937,7 +937,7 @@ public class Main {
             System.out.println("            by this factor, it gets disabled.");
             System.out.println("    Miscellaneous:");
             System.out.println("        exectype=mapreduce|local; default is mapreduce. This property is the same as -x switch");
-            System.out.println("        pig.additional.jars=<colon seperated list of jars>. Used in place of register command.");
+            System.out.println("        pig.additional.jars.comma=<comma seperated list of jars>. Used in place of register command.");
             System.out.println("        udf.import.list=<comma seperated list of imports>. Used to avoid package names in UDF.");
             System.out.println("        stop.on.failure=true|false; default is false. Set to true to terminate on the first error.");
             System.out.println("        pig.datetime.default.tz=<UTC time offset>. e.g. +08:00. Default is the default timezone of the host.");

Modified: pig/branches/branch-0.14/src/org/apache/pig/PigServer.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.14/src/org/apache/pig/PigServer.java?rev=1635615&r1=1635614&r2=1635615&view=diff
==============================================================================
--- pig/branches/branch-0.14/src/org/apache/pig/PigServer.java (original)
+++ pig/branches/branch-0.14/src/org/apache/pig/PigServer.java Thu Oct 30 21:15:36 2014
@@ -243,12 +243,24 @@ public class PigServer {
     private void addJarsFromProperties() throws ExecException {
         //add jars from properties to extraJars
         String jar_str = pigContext.getProperties().getProperty("pig.additional.jars");
+        if (jar_str==null) {
+            jar_str = "";
+        }
+        jar_str = jar_str.replaceAll(File.pathSeparator, ",");
+        if (!jar_str.isEmpty()) {
+            jar_str += ",";
+        }
+
+        String jar_str_comma = pigContext.getProperties().getProperty("pig.additional.jars.comma");
+        if (jar_str_comma!=null && !jar_str_comma.isEmpty()) {
+            jar_str = jar_str + jar_str_comma;
+        }
 
         if(jar_str != null){
             // Use File.pathSeparator (":" on Linux, ";" on Windows)
             // to correctly handle path aggregates as they are represented
             // on the Operating System.
-            for(String jar : jar_str.split(File.pathSeparator)){
+            for(String jar : jar_str.split(",")){
                 try {
                     registerJar(jar);
                 } catch (IOException e) {

Modified: pig/branches/branch-0.14/src/org/apache/pig/impl/io/FileLocalizer.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.14/src/org/apache/pig/impl/io/FileLocalizer.java?rev=1635615&r1=1635614&r2=1635615&view=diff
==============================================================================
--- pig/branches/branch-0.14/src/org/apache/pig/impl/io/FileLocalizer.java (original)
+++ pig/branches/branch-0.14/src/org/apache/pig/impl/io/FileLocalizer.java Thu Oct 30 21:15:36 2014
@@ -787,6 +787,9 @@ public class FileLocalizer {
                                             boolean multipleFiles) throws IOException {
 
         Path path = new Path(filePath);
+        if (path.getName().isEmpty()) {
+            return new FetchFileRet[0];
+        }
         URI uri = path.toUri();
         Configuration conf = new Configuration();
         ConfigurationUtil.mergeConf(conf, ConfigurationUtil.toConfiguration(properties));