You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2014/01/09 20:35:08 UTC

svn commit: r1556931 - in /pig/trunk: CHANGES.txt src/org/apache/pig/PigServer.java src/org/apache/pig/impl/PigContext.java src/org/apache/pig/impl/util/JarManager.java

Author: daijy
Date: Thu Jan  9 19:35:08 2014
New Revision: 1556931

URL: http://svn.apache.org/r1556931
Log:
PIG-3653: Add support for pre-deployed jars

Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/src/org/apache/pig/PigServer.java
    pig/trunk/src/org/apache/pig/impl/PigContext.java
    pig/trunk/src/org/apache/pig/impl/util/JarManager.java

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1556931&r1=1556930&r2=1556931&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu Jan  9 19:35:08 2014
@@ -28,6 +28,8 @@ PIG-3419: Pluggable Execution Engine (ac
 
 IMPROVEMENTS
 
+PIG-3653: Add support for pre-deployed jars (tmwoodruff via daijy)
+
 PIG-3645: Move FileLocalizer.setR() calls to unit tests (cheolsoo)
 
 PIG-3637: PigCombiner creating log spam (rohini)

Modified: pig/trunk/src/org/apache/pig/PigServer.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/PigServer.java?rev=1556931&r1=1556930&r2=1556931&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/PigServer.java (original)
+++ pig/trunk/src/org/apache/pig/PigServer.java Thu Jan  9 19:35:08 2014
@@ -229,6 +229,7 @@ public class PigServer {
         }
 
         addJarsFromProperties();
+        markPredeployedJarsFromProperties();
 
         if (PigStats.get() == null) {
             PigStats.start(pigContext.getExecutionEngine().instantiatePigStats());
@@ -265,6 +266,22 @@ public class PigServer {
         }
     }
 
+    private void markPredeployedJarsFromProperties() throws ExecException {
+        // mark jars as predeployed from properties
+        String jar_str = pigContext.getProperties().getProperty("pig.predeployed.jars");
+		
+        if(jar_str != null){
+            // Use File.pathSeparator (":" on Linux, ";" on Windows)
+            // to correctly handle path aggregates as they are represented
+            // on the Operating System.
+            for(String jar : jar_str.split(File.pathSeparator)){
+                if (jar.length() > 0) {
+                    pigContext.markJarAsPredeployed(jar);
+                }
+            }
+        }
+    }
+
     public PigContext getPigContext(){
         return pigContext;
     }

Modified: pig/trunk/src/org/apache/pig/impl/PigContext.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/impl/PigContext.java?rev=1556931&r1=1556930&r2=1556931&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/impl/PigContext.java (original)
+++ pig/trunk/src/org/apache/pig/impl/PigContext.java Thu Jan  9 19:35:08 2014
@@ -23,9 +23,9 @@ import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.FileReader;
 import java.io.FileWriter;
+import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
-import java.io.IOException;
 import java.io.Serializable;
 import java.io.StringWriter;
 import java.lang.reflect.Constructor;
@@ -118,6 +118,9 @@ public class PigContext implements Seria
     // (some functions may come from pig.jar and we don't want the whole jar file.)
     transient public Vector<String> skipJars = new Vector<String>(2);
 
+    // jars that are predeployed to the cluster and thus should not be merged in at all (even subsets).
+    transient public Vector<String> predeployedJars = new Vector<String>(2);
+    
     // script files that are needed to run a job
     @Deprecated
     public List<String> scriptFiles = new ArrayList<String>();
@@ -355,6 +358,17 @@ public class PigContext implements Seria
             Thread.currentThread().setContextClassLoader(PigContext.classloader);
         }
     }
+    
+    /**
+     * Adds the specified path to the predeployed jars list. These jars will 
+     * never be included in generated job jar.
+     * <p>
+     * This can be called for jars that are pre-installed on the Hadoop 
+     * cluster to reduce the size of the job jar.
+     */
+    public void markJarAsPredeployed(String path) {
+        predeployedJars.add(path);
+    }
 
     public String doParamSubstitution(InputStream in,
                                       List<String> params,

Modified: pig/trunk/src/org/apache/pig/impl/util/JarManager.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/impl/util/JarManager.java?rev=1556931&r1=1556930&r2=1556931&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/impl/util/JarManager.java (original)
+++ pig/trunk/src/org/apache/pig/impl/util/JarManager.java Thu Jan  9 19:35:08 2014
@@ -306,6 +306,8 @@ public class JarManager {
      */
     private static void addContainingJar(Vector<JarListEntry> jarList, Class clazz, String prefix, PigContext pigContext) {
         String jar = findContainingJar(clazz);
+        if (pigContext.predeployedJars.contains(jar))
+            return;
         if (pigContext.skipJars.contains(jar) && prefix == null)
             return;
         if (jar == null)