You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ro...@apache.org on 2014/11/20 20:37:16 UTC
svn commit: r1640793 - in /pig/trunk: CHANGES.txt build.xml
ivy/libraries.properties src/org/apache/pig/impl/util/JarManager.java
test/org/apache/pig/test/TestJobControlCompiler.java
test/org/apache/pig/test/TestPredeployedJar.java
Author: rohini
Date: Thu Nov 20 19:37:15 2014
New Revision: 1640793
URL: http://svn.apache.org/r1640793
Log:
PIG-4332: Remove redundant jars packaged into pig-withouthadoop.jar for hadoop 2 (rohini)
Modified:
pig/trunk/CHANGES.txt
pig/trunk/build.xml
pig/trunk/ivy/libraries.properties
pig/trunk/src/org/apache/pig/impl/util/JarManager.java
pig/trunk/test/org/apache/pig/test/TestJobControlCompiler.java
pig/trunk/test/org/apache/pig/test/TestPredeployedJar.java
Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1640793&r1=1640792&r2=1640793&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu Nov 20 19:37:15 2014
@@ -26,6 +26,8 @@ IMPROVEMENTS
BUG FIXES
+PIG-4332: Remove redundant jars packaged into pig-withouthadoop.jar for hadoop 2 (rohini)
+
PIG-4331: update README, '-x' option in usage to include tez (thejas via daijy)
PIG-4327: Schema of map with value that has an alias can't be parsed again (mprim via daijy)
Modified: pig/trunk/build.xml
URL: http://svn.apache.org/viewvc/pig/trunk/build.xml?rev=1640793&r1=1640792&r2=1640793&view=diff
==============================================================================
--- pig/trunk/build.xml (original)
+++ pig/trunk/build.xml Thu Nov 20 19:37:15 2014
@@ -376,18 +376,14 @@
<include name="antlr-runtime-${antlr.version}.jar"/>
<include name="ST4-${stringtemplate.version}.jar"/>
<include name="jline-${jline.version}.jar"/>
- <include name="jackson-mapper-asl-${jackson.version}.jar"/>
- <include name="jackson-core-asl-${jackson.version}.jar"/>
<include name="joda-time-${joda-time.version}.jar"/>
- <include name="guava-${guava.version}.jar"/>
<include name="automaton-${automaton.version}.jar"/>
<include name="jansi-${jansi.version}.jar"/>
- <include name="avro-${avro.version}.jar"/>
- <include name="avro-mapred-${avro.version}*.jar"/>
- <include name="trevni-core-${avro.version}.jar"/>
- <include name="trevni-avro-${avro.version}.jar"/>
- <include name="snappy-java-${snappy.version}.jar"/>
- <include name="asm*.jar"/>
+ <include name="jackson-mapper-asl-${jackson.version}.jar" unless="isHadoop23"/>
+ <include name="jackson-core-asl-${jackson.version}.jar" unless="isHadoop23"/>
+ <include name="guava-${guava.version}.jar" unless="isHadoop23"/>
+ <include name="snappy-java-${snappy.version}.jar" unless="isHadoop23"/>
+ <include name="asm-${asm.version}.jar" unless="isHadoop23"/>
</patternset>
</fileset>
Modified: pig/trunk/ivy/libraries.properties
URL: http://svn.apache.org/viewvc/pig/trunk/ivy/libraries.properties?rev=1640793&r1=1640792&r2=1640793&view=diff
==============================================================================
--- pig/trunk/ivy/libraries.properties (original)
+++ pig/trunk/ivy/libraries.properties Thu Nov 20 19:37:15 2014
@@ -61,7 +61,7 @@ jettison.version=1.3.4
jetty.version=6.1.26
jetty-util.version=6.1.26
jline.version=1.0
-joda-time.version=2.1
+joda-time.version=2.5
jopt.version=4.1
json-simple.version=1.1
junit.version=4.11
Modified: pig/trunk/src/org/apache/pig/impl/util/JarManager.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/impl/util/JarManager.java?rev=1640793&r1=1640792&r2=1640793&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/impl/util/JarManager.java (original)
+++ pig/trunk/src/org/apache/pig/impl/util/JarManager.java Thu Nov 20 19:37:15 2014
@@ -47,10 +47,9 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.StringUtils;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapReduce;
+import org.apache.pig.backend.hadoop.executionengine.shims.HadoopShims;
import org.apache.pig.impl.PigContext;
import org.apache.tools.bzip2r.BZip2Constants;
-import org.codehaus.jackson.annotate.JsonPropertyOrder;
-import org.codehaus.jackson.map.annotate.JacksonStdImpl;
import org.joda.time.DateTime;
import com.google.common.collect.Multimaps;
@@ -153,7 +152,7 @@ public class JarManager {
/**
* Creates a Classloader based on the passed jarFile and any extra jar files.
- *
+ *
* @param jarFile
* the jar file to be part of the newly created Classloader. This jar file plus any
* jars in the extraJars list will constitute the classpath.
@@ -175,7 +174,7 @@ public class JarManager {
/**
* Adds a stream to a Jar file.
- *
+ *
* @param os
* the OutputStream of the Jar file to which the stream will be added.
* @param name
@@ -209,6 +208,9 @@ public class JarManager {
public static List<String> getDefaultJars() {
List<String> defaultJars = new ArrayList<String>();
for (DefaultPigPackages pkgToSend : DefaultPigPackages.values()) {
+ if(pkgToSend.equals(DefaultPigPackages.GUAVA) && HadoopShims.isHadoopYARN()) {
+ continue; //Skip
+ }
String jar = findContainingJar(pkgToSend.getPkgClass());
if (!defaultJars.contains(jar)) {
defaultJars.add(jar);
@@ -220,7 +222,7 @@ public class JarManager {
/**
* Find a jar that contains a class of the same name, if any. It will return a jar file, even if
* that is not the first thing on the class path that has a class with the same name.
- *
+ *
* @param my_class
* the class to find
* @return a jar file that contains the class, or null
@@ -262,12 +264,12 @@ public class JarManager {
}
return null;
}
-
+
/**
* Add the jars containing the given classes to the job's configuration
* such that JobClient will ship them to the cluster and add them to
* the DistributedCache
- *
+ *
* @param job
* Job object
* @param classes
@@ -285,10 +287,10 @@ public class JarManager {
return;
conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[0])));
}
-
+
/**
- * Add the qualified path name of jars containing the given classes
- *
+ * Add the qualified path name of jars containing the given classes
+ *
* @param fs
* FileSystem object
* @param jars
Modified: pig/trunk/test/org/apache/pig/test/TestJobControlCompiler.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestJobControlCompiler.java?rev=1640793&r1=1640792&r2=1640793&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestJobControlCompiler.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestJobControlCompiler.java Thu Nov 20 19:37:15 2014
@@ -130,7 +130,8 @@ public class TestJobControlCompiler {
// verifying the jar gets on distributed cache
Path[] fileClassPaths = DistributedCache.getFileClassPaths(jobConf);
- Assert.assertEquals("size for "+Arrays.toString(fileClassPaths), 6, fileClassPaths.length);
+ // guava jar is not shipped with Hadoop 2.x
+ Assert.assertEquals("size for "+Arrays.toString(fileClassPaths), HadoopShims.isHadoopYARN() ? 5 : 6, fileClassPaths.length);
Path distributedCachePath = fileClassPaths[0];
Assert.assertEquals("ends with jar name: "+distributedCachePath, distributedCachePath.getName(), tmpFile.getName());
// hadoop bug requires path to not contain hdfs://hotname in front
@@ -235,13 +236,14 @@ public class TestJobControlCompiler {
System.out.println("cache.files= " + Arrays.toString(cacheURIs));
System.out.println("classpath.files= " + Arrays.toString(fileClassPaths));
if (HadoopShims.isHadoopYARN()) {
- // Default jars - 5 (pig, antlr, joda-time, guava, automaton)
+ // Default jars - 5 (pig, antlr, joda-time, automaton)
// Other jars - 10 (udf.jar#udf.jar, udf1.jar#diffname.jar, udf2.jar, udf1.jar, another.jar
- Assert.assertEquals("size 10 for " + Arrays.toString(cacheURIs), 10,
+ Assert.assertEquals("size 9 for " + Arrays.toString(cacheURIs), 9,
Arrays.asList(StringUtils.join(cacheURIs, ",").split(",")).size());
- Assert.assertEquals("size 10 for " + Arrays.toString(fileClassPaths), 10,
+ Assert.assertEquals("size 9 for " + Arrays.toString(fileClassPaths), 9,
Arrays.asList(StringUtils.join(fileClassPaths, ",").split(",")).size());
} else {
+ // Default jars - 5. Has guava in addition
// There will be same entries duplicated for udf.jar and udf2.jar
Assert.assertEquals("size 12 for " + Arrays.toString(cacheURIs), 12,
Arrays.asList(StringUtils.join(cacheURIs, ",").split(",")).size());
@@ -257,7 +259,11 @@ public class TestJobControlCompiler {
val = (val == null) ? 1 : ++val;
occurrences.put(cacheURI.toString(), val);
}
- Assert.assertEquals(10, occurrences.size());
+ if (HadoopShims.isHadoopYARN()) {
+ Assert.assertEquals(9, occurrences.size());
+ } else {
+ Assert.assertEquals(10, occurrences.size()); //guava jar in addition
+ }
for (String file : occurrences.keySet()) {
if (!HadoopShims.isHadoopYARN() && (file.endsWith("udf.jar") || file.endsWith("udf2.jar"))) {
Modified: pig/trunk/test/org/apache/pig/test/TestPredeployedJar.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestPredeployedJar.java?rev=1640793&r1=1640792&r2=1640793&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestPredeployedJar.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestPredeployedJar.java Thu Nov 20 19:37:15 2014
@@ -38,8 +38,8 @@ import org.junit.Assert;
import org.junit.Test;
/**
- * Ensure that jars marked as predeployed are not included in the generated
- * job jar.
+ * Ensure that jars marked as predeployed are not included in the generated
+ * job jar.
*/
public class TestPredeployedJar {
static MiniGenericCluster cluster = MiniGenericCluster.buildCluster();
@@ -52,44 +52,44 @@ public class TestPredeployedJar {
File logFile = File.createTempFile("log", "");
FileAppender appender = new FileAppender(layout, logFile.toString(), false, false, 0);
logger.addAppender(appender);
-
+
PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getConfiguration());
pigServer.getPigContext().getProperties().put(PigConfiguration.PIG_OPT_FETCH, "false");
String[] inputData = new String[] { "hello", "world" };
Util.createInputFile(cluster, "a.txt", inputData);
- String guavaJar = JarManager.findContainingJar(com.google.common.collect.Multimaps.class);
+ String jodaTimeJar = JarManager.findContainingJar(org.joda.time.DateTime.class);
pigServer.registerQuery("a = load 'a.txt' as (line:chararray);");
Iterator<Tuple> it = pigServer.openIterator("a");
String content = FileUtils.readFileToString(logFile);
- Assert.assertTrue(content.contains(guavaJar));
-
+ Assert.assertTrue(content.contains(jodaTimeJar));
+
logFile = File.createTempFile("log", "");
-
+
// Now let's mark the guava jar as predeployed.
- pigServer.getPigContext().markJarAsPredeployed(guavaJar);
+ pigServer.getPigContext().markJarAsPredeployed(jodaTimeJar);
it = pigServer.openIterator("a");
content = FileUtils.readFileToString(logFile);
- Assert.assertFalse(content.contains(guavaJar));
+ Assert.assertFalse(content.contains(jodaTimeJar));
}
-
+
@Test
public void testPredeployedJarsProperty() throws ExecException {
Properties p = new Properties();
p.setProperty("pig.predeployed.jars", "zzz");
PigServer pigServer = new PigServer(ExecType.LOCAL, p);
-
+
Assert.assertTrue(pigServer.getPigContext().predeployedJars.contains("zzz"));
-
+
p = new Properties();
p.setProperty("pig.predeployed.jars", "aaa" + File.pathSeparator + "bbb");
pigServer = new PigServer(ExecType.LOCAL, p);
-
+
Assert.assertTrue(pigServer.getPigContext().predeployedJars.contains("aaa"));
Assert.assertTrue(pigServer.getPigContext().predeployedJars.contains("bbb"));
-
+
Assert.assertFalse(pigServer.getPigContext().predeployedJars.contains("zzz"));
}
}