You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ro...@apache.org on 2014/11/20 20:37:16 UTC

svn commit: r1640793 - in /pig/trunk: CHANGES.txt build.xml ivy/libraries.properties src/org/apache/pig/impl/util/JarManager.java test/org/apache/pig/test/TestJobControlCompiler.java test/org/apache/pig/test/TestPredeployedJar.java

Author: rohini
Date: Thu Nov 20 19:37:15 2014
New Revision: 1640793

URL: http://svn.apache.org/r1640793
Log:
PIG-4332: Remove redundant jars packaged into pig-withouthadoop.jar for hadoop 2 (rohini)

Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/build.xml
    pig/trunk/ivy/libraries.properties
    pig/trunk/src/org/apache/pig/impl/util/JarManager.java
    pig/trunk/test/org/apache/pig/test/TestJobControlCompiler.java
    pig/trunk/test/org/apache/pig/test/TestPredeployedJar.java

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1640793&r1=1640792&r2=1640793&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu Nov 20 19:37:15 2014
@@ -26,6 +26,8 @@ IMPROVEMENTS
  
 BUG FIXES
 
+PIG-4332: Remove redundant jars packaged into pig-withouthadoop.jar for hadoop 2 (rohini)
+
 PIG-4331: update README, '-x' option in usage to include tez (thejas via daijy)
 
 PIG-4327: Schema of map with value that has an alias can't be parsed again (mprim via daijy)

Modified: pig/trunk/build.xml
URL: http://svn.apache.org/viewvc/pig/trunk/build.xml?rev=1640793&r1=1640792&r2=1640793&view=diff
==============================================================================
--- pig/trunk/build.xml (original)
+++ pig/trunk/build.xml Thu Nov 20 19:37:15 2014
@@ -376,18 +376,14 @@
             <include name="antlr-runtime-${antlr.version}.jar"/>
             <include name="ST4-${stringtemplate.version}.jar"/>
             <include name="jline-${jline.version}.jar"/>
-            <include name="jackson-mapper-asl-${jackson.version}.jar"/>
-            <include name="jackson-core-asl-${jackson.version}.jar"/>
             <include name="joda-time-${joda-time.version}.jar"/>
-            <include name="guava-${guava.version}.jar"/>
             <include name="automaton-${automaton.version}.jar"/>
             <include name="jansi-${jansi.version}.jar"/>
-            <include name="avro-${avro.version}.jar"/>
-            <include name="avro-mapred-${avro.version}*.jar"/>
-            <include name="trevni-core-${avro.version}.jar"/>
-            <include name="trevni-avro-${avro.version}.jar"/>
-            <include name="snappy-java-${snappy.version}.jar"/>
-            <include name="asm*.jar"/>
+            <include name="jackson-mapper-asl-${jackson.version}.jar" unless="isHadoop23"/>
+            <include name="jackson-core-asl-${jackson.version}.jar" unless="isHadoop23"/>
+            <include name="guava-${guava.version}.jar" unless="isHadoop23"/>
+            <include name="snappy-java-${snappy.version}.jar" unless="isHadoop23"/>
+            <include name="asm-${asm.version}.jar" unless="isHadoop23"/>
         </patternset>
     </fileset>
 

Modified: pig/trunk/ivy/libraries.properties
URL: http://svn.apache.org/viewvc/pig/trunk/ivy/libraries.properties?rev=1640793&r1=1640792&r2=1640793&view=diff
==============================================================================
--- pig/trunk/ivy/libraries.properties (original)
+++ pig/trunk/ivy/libraries.properties Thu Nov 20 19:37:15 2014
@@ -61,7 +61,7 @@ jettison.version=1.3.4
 jetty.version=6.1.26
 jetty-util.version=6.1.26
 jline.version=1.0
-joda-time.version=2.1
+joda-time.version=2.5
 jopt.version=4.1
 json-simple.version=1.1
 junit.version=4.11

Modified: pig/trunk/src/org/apache/pig/impl/util/JarManager.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/impl/util/JarManager.java?rev=1640793&r1=1640792&r2=1640793&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/impl/util/JarManager.java (original)
+++ pig/trunk/src/org/apache/pig/impl/util/JarManager.java Thu Nov 20 19:37:15 2014
@@ -47,10 +47,9 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapReduce;
+import org.apache.pig.backend.hadoop.executionengine.shims.HadoopShims;
 import org.apache.pig.impl.PigContext;
 import org.apache.tools.bzip2r.BZip2Constants;
-import org.codehaus.jackson.annotate.JsonPropertyOrder;
-import org.codehaus.jackson.map.annotate.JacksonStdImpl;
 import org.joda.time.DateTime;
 
 import com.google.common.collect.Multimaps;
@@ -153,7 +152,7 @@ public class JarManager {
 
     /**
      * Creates a Classloader based on the passed jarFile and any extra jar files.
-     * 
+     *
      * @param jarFile
      *            the jar file to be part of the newly created Classloader. This jar file plus any
      *            jars in the extraJars list will constitute the classpath.
@@ -175,7 +174,7 @@ public class JarManager {
 
      /**
      * Adds a stream to a Jar file.
-     * 
+     *
      * @param os
      *            the OutputStream of the Jar file to which the stream will be added.
      * @param name
@@ -209,6 +208,9 @@ public class JarManager {
     public static List<String> getDefaultJars() {
         List<String> defaultJars = new ArrayList<String>();
         for (DefaultPigPackages pkgToSend : DefaultPigPackages.values()) {
+            if(pkgToSend.equals(DefaultPigPackages.GUAVA) && HadoopShims.isHadoopYARN()) {
+                continue; //Skip
+            }
             String jar = findContainingJar(pkgToSend.getPkgClass());
             if (!defaultJars.contains(jar)) {
                 defaultJars.add(jar);
@@ -220,7 +222,7 @@ public class JarManager {
     /**
      * Find a jar that contains a class of the same name, if any. It will return a jar file, even if
      * that is not the first thing on the class path that has a class with the same name.
-     * 
+     *
      * @param my_class
      *            the class to find
      * @return a jar file that contains the class, or null
@@ -262,12 +264,12 @@ public class JarManager {
         }
         return null;
     }
-    
+
     /**
      * Add the jars containing the given classes to the job's configuration
      * such that JobClient will ship them to the cluster and add them to
      * the DistributedCache
-     * 
+     *
      * @param job
      *           Job object
      * @param classes
@@ -285,10 +287,10 @@ public class JarManager {
             return;
         conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[0])));
     }
-    
+
     /**
-     * Add the qualified path name of jars containing the given classes 
-     * 
+     * Add the qualified path name of jars containing the given classes
+     *
      * @param fs
      *            FileSystem object
      * @param jars

Modified: pig/trunk/test/org/apache/pig/test/TestJobControlCompiler.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestJobControlCompiler.java?rev=1640793&r1=1640792&r2=1640793&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestJobControlCompiler.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestJobControlCompiler.java Thu Nov 20 19:37:15 2014
@@ -130,7 +130,8 @@ public class TestJobControlCompiler {
 
     // verifying the jar gets on distributed cache
     Path[] fileClassPaths = DistributedCache.getFileClassPaths(jobConf);
-    Assert.assertEquals("size for "+Arrays.toString(fileClassPaths), 6, fileClassPaths.length);
+    // guava jar is not shipped with Hadoop 2.x
+    Assert.assertEquals("size for "+Arrays.toString(fileClassPaths), HadoopShims.isHadoopYARN() ? 5 : 6, fileClassPaths.length);
     Path distributedCachePath = fileClassPaths[0];
     Assert.assertEquals("ends with jar name: "+distributedCachePath, distributedCachePath.getName(), tmpFile.getName());
     // hadoop bug requires path to not contain hdfs://hotname in front
@@ -235,13 +236,14 @@ public class TestJobControlCompiler {
           System.out.println("cache.files= " + Arrays.toString(cacheURIs));
           System.out.println("classpath.files= " + Arrays.toString(fileClassPaths));
           if (HadoopShims.isHadoopYARN()) {
-              // Default jars - 5 (pig, antlr, joda-time, guava, automaton)
+              // Default jars - 5 (pig, antlr, joda-time, automaton)
               // Other jars - 10 (udf.jar#udf.jar, udf1.jar#diffname.jar, udf2.jar, udf1.jar, another.jar
-              Assert.assertEquals("size 10 for " + Arrays.toString(cacheURIs), 10,
+              Assert.assertEquals("size 9 for " + Arrays.toString(cacheURIs), 9,
                       Arrays.asList(StringUtils.join(cacheURIs, ",").split(",")).size());
-              Assert.assertEquals("size 10 for " + Arrays.toString(fileClassPaths), 10,
+              Assert.assertEquals("size 9 for " + Arrays.toString(fileClassPaths), 9,
                       Arrays.asList(StringUtils.join(fileClassPaths, ",").split(",")).size());
           } else {
+              // Default jars - 5. Has guava in addition
               // There will be same entries duplicated for udf.jar and udf2.jar
               Assert.assertEquals("size 12 for " + Arrays.toString(cacheURIs), 12,
                       Arrays.asList(StringUtils.join(cacheURIs, ",").split(",")).size());
@@ -257,7 +259,11 @@ public class TestJobControlCompiler {
               val = (val == null) ? 1 : ++val;
               occurrences.put(cacheURI.toString(), val);
           }
-          Assert.assertEquals(10, occurrences.size());
+          if (HadoopShims.isHadoopYARN()) {
+              Assert.assertEquals(9, occurrences.size());
+          } else {
+              Assert.assertEquals(10, occurrences.size()); //guava jar in addition
+          }
 
           for (String file : occurrences.keySet()) {
               if (!HadoopShims.isHadoopYARN() && (file.endsWith("udf.jar") || file.endsWith("udf2.jar"))) {

Modified: pig/trunk/test/org/apache/pig/test/TestPredeployedJar.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestPredeployedJar.java?rev=1640793&r1=1640792&r2=1640793&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestPredeployedJar.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestPredeployedJar.java Thu Nov 20 19:37:15 2014
@@ -38,8 +38,8 @@ import org.junit.Assert;
 import org.junit.Test;
 
 /**
- * Ensure that jars marked as predeployed are not included in the generated 
- * job jar. 
+ * Ensure that jars marked as predeployed are not included in the generated
+ * job jar.
  */
 public class TestPredeployedJar {
     static MiniGenericCluster cluster = MiniGenericCluster.buildCluster();
@@ -52,44 +52,44 @@ public class TestPredeployedJar {
         File logFile = File.createTempFile("log", "");
         FileAppender appender = new FileAppender(layout, logFile.toString(), false, false, 0);
         logger.addAppender(appender);
-        
+
         PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getConfiguration());
         pigServer.getPigContext().getProperties().put(PigConfiguration.PIG_OPT_FETCH, "false");
         String[] inputData = new String[] { "hello", "world" };
         Util.createInputFile(cluster, "a.txt", inputData);
-        String guavaJar = JarManager.findContainingJar(com.google.common.collect.Multimaps.class);
+        String jodaTimeJar = JarManager.findContainingJar(org.joda.time.DateTime.class);
 
         pigServer.registerQuery("a = load 'a.txt' as (line:chararray);");
         Iterator<Tuple> it = pigServer.openIterator("a");
 
         String content = FileUtils.readFileToString(logFile);
-        Assert.assertTrue(content.contains(guavaJar));
-        
+        Assert.assertTrue(content.contains(jodaTimeJar));
+
         logFile = File.createTempFile("log", "");
-        
+
         // Now let's mark the guava jar as predeployed.
-        pigServer.getPigContext().markJarAsPredeployed(guavaJar);
+        pigServer.getPigContext().markJarAsPredeployed(jodaTimeJar);
         it = pigServer.openIterator("a");
 
         content = FileUtils.readFileToString(logFile);
-        Assert.assertFalse(content.contains(guavaJar));
+        Assert.assertFalse(content.contains(jodaTimeJar));
     }
-    
+
     @Test
     public void testPredeployedJarsProperty() throws ExecException {
         Properties p = new Properties();
         p.setProperty("pig.predeployed.jars", "zzz");
         PigServer pigServer = new PigServer(ExecType.LOCAL, p);
-        
+
         Assert.assertTrue(pigServer.getPigContext().predeployedJars.contains("zzz"));
-        
+
         p = new Properties();
         p.setProperty("pig.predeployed.jars", "aaa" + File.pathSeparator + "bbb");
         pigServer = new PigServer(ExecType.LOCAL, p);
-        
+
         Assert.assertTrue(pigServer.getPigContext().predeployedJars.contains("aaa"));
         Assert.assertTrue(pigServer.getPigContext().predeployedJars.contains("bbb"));
-        
+
         Assert.assertFalse(pigServer.getPigContext().predeployedJars.contains("zzz"));
     }
 }