You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2014/10/30 22:15:36 UTC
svn commit: r1635615 - in /pig/branches/branch-0.14: CHANGES.txt bin/pig
bin/pig.cmd src/docs/src/documentation/content/xdocs/basic.xml
src/org/apache/pig/Main.java src/org/apache/pig/PigServer.java
src/org/apache/pig/impl/io/FileLocalizer.java
Author: daijy
Date: Thu Oct 30 21:15:36 2014
New Revision: 1635615
URL: http://svn.apache.org/r1635615
Log:
PIG-4160: Provide a way to pass local jars in pig.additional.jars when using a remote url for a script
Modified:
pig/branches/branch-0.14/CHANGES.txt
pig/branches/branch-0.14/bin/pig
pig/branches/branch-0.14/bin/pig.cmd
pig/branches/branch-0.14/src/docs/src/documentation/content/xdocs/basic.xml
pig/branches/branch-0.14/src/org/apache/pig/Main.java
pig/branches/branch-0.14/src/org/apache/pig/PigServer.java
pig/branches/branch-0.14/src/org/apache/pig/impl/io/FileLocalizer.java
Modified: pig/branches/branch-0.14/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.14/CHANGES.txt?rev=1635615&r1=1635614&r2=1635615&view=diff
==============================================================================
--- pig/branches/branch-0.14/CHANGES.txt (original)
+++ pig/branches/branch-0.14/CHANGES.txt Thu Oct 30 21:15:36 2014
@@ -24,6 +24,9 @@ INCOMPATIBLE CHANGES
IMPROVEMENTS
+PIG-4160: Provide a way to pass local jars in pig.additional.jars when using a remote
+ url for a script (acoliver via daijy)
+
PIG-4246: HBaseStorage should implement getShipFiles (rohini)
PIG-3456: Reduce threadlocal conf access in backend for each record (rohini)
Modified: pig/branches/branch-0.14/bin/pig
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.14/bin/pig?rev=1635615&r1=1635614&r2=1635615&view=diff
==============================================================================
--- pig/branches/branch-0.14/bin/pig (original)
+++ pig/branches/branch-0.14/bin/pig Thu Oct 30 21:15:36 2014
@@ -55,7 +55,7 @@ debug=false
remaining=()
includeHCatalog="";
-addJarString=-Dpig.additional.jars\=;
+addJarString=-Dpig.additional.jars.comma\=;
additionalJars="";
# filter command line parameter
for f in "$@"; do
@@ -187,11 +187,12 @@ if [ "$includeHCatalog" == "true" ]; the
# get the pig storage handler jar
pigHCatJarPath=`ls $HCAT_HOME/share/hcatalog/${pigHCatJar}`
- ADDITIONAL_CLASSPATHS=$hiveMetaStoreVersion:$thriftVersion:$hiveExecVersion:$fbJarVersion:$jdoECJarVersion:$slfJarVersion:$hbaseHiveVersion:$hcatJarPath:$hbaseHCatJarPath:$pigHCatJarPath
+ HCAT_CLASSPATHS=$hiveMetaStoreVersion:$thriftVersion:$hiveExecVersion:$fbJarVersion:$jdoECJarVersion:$slfJarVersion:$hbaseHiveVersion:$hcatJarPath:$hbaseHCatJarPath:$pigHCatJarPath
+ ADDITIONAL_CLASSPATHS=file://$hiveMetaStoreVersion,file://$thriftVersion,file://$hiveExecVersion,file://$fbJarVersion,file://$jdoECJarVersion,file://$slfJarVersion,file://$hbaseHiveVersion,file://$hcatJarPath,file://$hbaseHCatJarPath,file://$pigHCatJarPath
if [ "$additionalJars" != "" ]; then
ADDITIONAL_CLASSPATHS=$ADDITIONAL_CLASSPATHS:$additionalJars
fi
- CLASSPATH=${CLASSPATH}:$ADDITIONAL_CLASSPATHS:$HIVE_HOME/conf
+ CLASSPATH=${CLASSPATH}:$HCAT_CLASSPATHS:$HIVE_HOME/conf
fi
# Add user-specified CLASSPATH entries via PIG_CLASSPATH
@@ -352,12 +353,12 @@ PIG_OPTS="$PIG_OPTS -Dpig.log.dir=$PIG_L
PIG_OPTS="$PIG_OPTS -Dpig.log.file=$PIG_LOGFILE"
PIG_OPTS="$PIG_OPTS -Dpig.home.dir=$PIG_HOME"
if [ "$includeHCatalog" == "true" ]; then
- addJars=`echo $PIG_OPTS | awk '{ for (i=1; i<=NF; i++) print $i; }' | grep "\-Dpig.additional.jars=" | sed s/-Dpig.additional.jars=//`
+ addJars=`echo $PIG_OPTS | awk '{ for (i=1; i<=NF; i++) print $i; }' | grep "\-Dpig.additional.jars.comma=" | sed s/-Dpig.additional.jars.comma=//`
if [ "$addJars" != "" ]; then
ADDITIONAL_CLASSPATHS=$addJars:$ADDITIONAL_CLASSPATHS
- PIG_OPTS=`echo $PIG_OPTS | sed 's/-Dpig.additional.jars=[^ ]*//'`
+ PIG_OPTS=`echo $PIG_OPTS | sed 's/-Dpig.additional.jars.comma=[^ ]*//'`
fi
- PIG_OPTS="$PIG_OPTS -Dpig.additional.jars=$ADDITIONAL_CLASSPATHS"
+ PIG_OPTS="$PIG_OPTS -Dpig.additional.jars.comma=$ADDITIONAL_CLASSPATHS"
fi
# run it
@@ -389,7 +390,6 @@ if [ -n "$HADOOP_BIN" ]; then
done
export HADOOP_CLASSPATH=$CLASSPATH:$HADOOP_CLASSPATH
- export HADOOP_OPTS="$JAVA_HEAP_MAX $PIG_OPTS $HADOOP_OPTS"
export HADOOP_CLIENT_OPTS="$JAVA_HEAP_MAX $PIG_OPTS $HADOOP_CLIENT_OPTS"
if [ "$debug" == "true" ]; then
echo "dry run:"
Modified: pig/branches/branch-0.14/bin/pig.cmd
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.14/bin/pig.cmd?rev=1635615&r1=1635614&r2=1635615&view=diff
==============================================================================
--- pig/branches/branch-0.14/bin/pig.cmd (original)
+++ pig/branches/branch-0.14/bin/pig.cmd Thu Oct 30 21:15:36 2014
@@ -105,6 +105,7 @@ set PIGARGS=
)
set HCAT_DEPENDCIES=
+ set HCAT_CLASSPATH=
if not defined HCAT_FLAG (
goto HCAT_END
)
@@ -149,8 +150,8 @@ set PIGARGS=
echo "HIVE_HOME should be defined"
exit /b 1
)
- set PIG_CLASSPATH=%PIG_CLASSPATH%;%HCAT_DEPENDCIES%;%HIVE_HOME%\conf
- set PIG_OPTS=%PIG_OPTS% -Dpig.additional.jars=%HCAT_DEPENDCIES%;%PIG_ADDITIONAL_JARS%
+ set PIG_CLASSPATH=%PIG_CLASSPATH%;%HCAT_CLASSPATH%;%HIVE_HOME%\conf
+ set PIG_OPTS=%PIG_OPTS% -Dpig.additional.jars.comma=%HCAT_DEPENDCIES%,%PIG_ADDITIONAL_JARS_COMMA%
:HCAT_END
if defined PIG_CLASSPATH (
@@ -197,7 +198,8 @@ set PIGARGS=
:AddJar
pushd %1
for /f %%a IN ('dir /b %2') do (
- set HCAT_DEPENDCIES=!HCAT_DEPENDCIES!;%1\%%a
+ set HCAT_CLASSPATH=!HCAT_CLASSPATH!;%1\%%a
+ set HCAT_DEPENDCIES=!HCAT_DEPENDCIES!,file:///%1\%%a
)
popd
:endlocal
Modified: pig/branches/branch-0.14/src/docs/src/documentation/content/xdocs/basic.xml
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.14/src/docs/src/documentation/content/xdocs/basic.xml?rev=1635615&r1=1635614&r2=1635615&view=diff
==============================================================================
--- pig/branches/branch-0.14/src/docs/src/documentation/content/xdocs/basic.xml (original)
+++ pig/branches/branch-0.14/src/docs/src/documentation/content/xdocs/basic.xml Thu Oct 30 21:15:36 2014
@@ -8960,7 +8960,7 @@ B = FOREACH A GENERATE myFunc($0);
<p id="register-glob">Additionally, JAR files stored in local file systems can be specified as a glob pattern using â*â. Pig will search for matching jars in the local file system, either the relative path (relative to your working directory) or the absolute path. Pig will pick up all JARs that match the glob.</p>
<p><strong>Command Line</strong></p>
- <p>You can register additional files (to use with your Pig script) via the command line using the -Dpig.additional.jars option.
+ <p>You can register additional files (to use with your Pig script) via PIG_OPTS environment variable using the -Dpig.additional.jars.comma option.
For more information see <a href="udf.html">User Defined Functions</a>.</p>
</section>
@@ -8975,16 +8975,18 @@ A = LOAD 'students';
B = FOREACH A GENERATE myfunc.MyEvalFunc($0);
</source>
-<p>In this example additional JAR files are registered via the command line.</p>
+<p>In this example additional JAR files are registered via PIG_OPTS environment variable.</p>
<source>
-pig -Dpig.additional.jars=my.jar:your.jar script.pig
+export PIG_OPTS="-Dpig.additional.jars.comma=my.jar,your.jar"
</source>
-<p>In this example a JAR file stored in HDFS is registered.</p>
+<p>In this example a JAR file stored in HDFS and a local JAR file are registered.</p>
<source>
-pig -Dpig.additional.jars=hdfs://nn.mydomain.com:9020/myjars/my.jar script.pig
+export PIG_OPTS="-Dpig.additional.jars.comma=hdfs://nn.mydomain.com:9020/myjars/my.jar,file:///home/root/pig/your.jar"
</source>
+<p>Note, the legacy property pig.additional.jars which use colon as separator is still supported. But we recommend to use pig.additional.jars.comma since colon is also used in URL scheme, and thus we cannot use full scheme in the list. We will deprecate pig.additional.jar in future releases.</p>
+
<p>This example shows how to specify a glob pattern using either a relative path or an absolute path.</p>
<source>
register /homes/user/pig/myfunc*.jar
Modified: pig/branches/branch-0.14/src/org/apache/pig/Main.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.14/src/org/apache/pig/Main.java?rev=1635615&r1=1635614&r2=1635615&view=diff
==============================================================================
--- pig/branches/branch-0.14/src/org/apache/pig/Main.java (original)
+++ pig/branches/branch-0.14/src/org/apache/pig/Main.java Thu Oct 30 21:15:36 2014
@@ -937,7 +937,7 @@ public class Main {
System.out.println(" by this factor, it gets disabled.");
System.out.println(" Miscellaneous:");
System.out.println(" exectype=mapreduce|local; default is mapreduce. This property is the same as -x switch");
- System.out.println(" pig.additional.jars=<colon seperated list of jars>. Used in place of register command.");
+ System.out.println(" pig.additional.jars.comma=<comma seperated list of jars>. Used in place of register command.");
System.out.println(" udf.import.list=<comma seperated list of imports>. Used to avoid package names in UDF.");
System.out.println(" stop.on.failure=true|false; default is false. Set to true to terminate on the first error.");
System.out.println(" pig.datetime.default.tz=<UTC time offset>. e.g. +08:00. Default is the default timezone of the host.");
Modified: pig/branches/branch-0.14/src/org/apache/pig/PigServer.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.14/src/org/apache/pig/PigServer.java?rev=1635615&r1=1635614&r2=1635615&view=diff
==============================================================================
--- pig/branches/branch-0.14/src/org/apache/pig/PigServer.java (original)
+++ pig/branches/branch-0.14/src/org/apache/pig/PigServer.java Thu Oct 30 21:15:36 2014
@@ -243,12 +243,24 @@ public class PigServer {
private void addJarsFromProperties() throws ExecException {
//add jars from properties to extraJars
String jar_str = pigContext.getProperties().getProperty("pig.additional.jars");
+ if (jar_str==null) {
+ jar_str = "";
+ }
+ jar_str = jar_str.replaceAll(File.pathSeparator, ",");
+ if (!jar_str.isEmpty()) {
+ jar_str += ",";
+ }
+
+ String jar_str_comma = pigContext.getProperties().getProperty("pig.additional.jars.comma");
+ if (jar_str_comma!=null && !jar_str_comma.isEmpty()) {
+ jar_str = jar_str + jar_str_comma;
+ }
if(jar_str != null){
// Use File.pathSeparator (":" on Linux, ";" on Windows)
// to correctly handle path aggregates as they are represented
// on the Operating System.
- for(String jar : jar_str.split(File.pathSeparator)){
+ for(String jar : jar_str.split(",")){
try {
registerJar(jar);
} catch (IOException e) {
Modified: pig/branches/branch-0.14/src/org/apache/pig/impl/io/FileLocalizer.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.14/src/org/apache/pig/impl/io/FileLocalizer.java?rev=1635615&r1=1635614&r2=1635615&view=diff
==============================================================================
--- pig/branches/branch-0.14/src/org/apache/pig/impl/io/FileLocalizer.java (original)
+++ pig/branches/branch-0.14/src/org/apache/pig/impl/io/FileLocalizer.java Thu Oct 30 21:15:36 2014
@@ -787,6 +787,9 @@ public class FileLocalizer {
boolean multipleFiles) throws IOException {
Path path = new Path(filePath);
+ if (path.getName().isEmpty()) {
+ return new FetchFileRet[0];
+ }
URI uri = path.toUri();
Configuration conf = new Configuration();
ConfigurationUtil.mergeConf(conf, ConfigurationUtil.toConfiguration(properties));