You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2012/02/16 20:59:04 UTC

svn commit: r1245145 - in /pig/branches/branch-0.10: CHANGES.txt contrib/zebra/build.xml src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java src/org/apache/pig/tools/grunt/GruntParser.java

Author: daijy
Date: Thu Feb 16 19:59:03 2012
New Revision: 1245145

URL: http://svn.apache.org/viewvc?rev=1245145&view=rev
Log:
PIG-2508: PIG can unpredictably ignore deprecated Hadoop config options

Modified:
    pig/branches/branch-0.10/CHANGES.txt
    pig/branches/branch-0.10/contrib/zebra/build.xml
    pig/branches/branch-0.10/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java
    pig/branches/branch-0.10/src/org/apache/pig/tools/grunt/GruntParser.java

Modified: pig/branches/branch-0.10/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.10/CHANGES.txt?rev=1245145&r1=1245144&r2=1245145&view=diff
==============================================================================
--- pig/branches/branch-0.10/CHANGES.txt (original)
+++ pig/branches/branch-0.10/CHANGES.txt Thu Feb 16 19:59:03 2012
@@ -326,6 +326,8 @@ Release 0.9.3 - Unreleased
 
 BUG FIXES
 
+PIG-2508: PIG can unpredictably ignore deprecated Hadoop config options (daijy)
+
 PIG-2493: UNION causes casting issues (vivekp via daijy)
 
 PIG-2497: Order of execution of fs, store and sh commands in Pig is not maintained (daijy)

Modified: pig/branches/branch-0.10/contrib/zebra/build.xml
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.10/contrib/zebra/build.xml?rev=1245145&r1=1245144&r2=1245145&view=diff
==============================================================================
--- pig/branches/branch-0.10/contrib/zebra/build.xml (original)
+++ pig/branches/branch-0.10/contrib/zebra/build.xml Thu Feb 16 19:59:03 2012
@@ -151,10 +151,14 @@ to call at top-level: ant deploy-contrib
         classpath to use build/contrib/*.jar
       -->
       <sysproperty key="user.dir" value="${test.build.dir}/data"/>
-      
-      <sysproperty key="fs.default.name" value="${fs.default.name}"/>
-      <sysproperty key="pig.test.localoutputfile" value="${pig.test.localoutputfile}"/>
-      <sysproperty key="pig.log.dir" value="${pig.log.dir}"/> 
+      <syspropertyset>	
+    	<propertyset>
+    	  <propertyref name="fs.default.name"/>
+      	  <propertyref name="pig.test.localoutputfile"/>
+          <propertyref name="pig.log.dir"/>
+    	</propertyset>
+      </syspropertyset>	
+
       <classpath refid="test.classpath"/>
       <formatter type="${test.junit.output.format}" />
 

Modified: pig/branches/branch-0.10/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.10/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java?rev=1245145&r1=1245144&r2=1245145&view=diff
==============================================================================
--- pig/branches/branch-0.10/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java (original)
+++ pig/branches/branch-0.10/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java Thu Feb 16 19:59:03 2012
@@ -39,7 +39,6 @@ import org.apache.pig.ExecType;
 import org.apache.pig.PigException;
 import org.apache.pig.backend.datastorage.DataStorage;
 import org.apache.pig.backend.executionengine.ExecException;
-import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil;
 import org.apache.pig.backend.hadoop.datastorage.HDataStorage;
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator;
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
@@ -120,13 +119,12 @@ public class HExecutionEngine {
     }
     
     @SuppressWarnings("deprecation")
-    public void init(Properties properties) throws ExecException {
+    private void init(Properties properties) throws ExecException {
         //First set the ssh socket factory
         setSSHFactory();
         
         String cluster = null;
         String nameNode = null;
-        Configuration configuration = null;
     
         // We need to build a configuration object first in the manner described below
         // and then get back a properties object to inspect the JOB_TRACKER_LOCATION
@@ -152,7 +150,7 @@ public class HExecutionEngine {
             
             if( hadoop_site == null && core_site == null ) {
                 throw new ExecException("Cannot find hadoop configurations in classpath (neither hadoop-site.xml nor core-site.xml was found in the classpath)." +
-                        "If you plan to use local mode, please put -x local option in command line", 
+                        " If you plan to use local mode, please put -x local option in command line", 
                         4010);
             }
 
@@ -203,18 +201,15 @@ public class HExecutionEngine {
         }
      
         log.info("Connecting to hadoop file system at: "  + (nameNode==null? LOCAL: nameNode) )  ;
+        // constructor sets DEFAULT_REPLICATION_FACTOR_KEY
         ds = new HDataStorage(properties);
                 
-        // The above HDataStorage constructor sets DEFAULT_REPLICATION_FACTOR_KEY in properties.
-        configuration = ConfigurationUtil.toConfiguration(properties);
-        
-            
         if(cluster != null && !cluster.equalsIgnoreCase(LOCAL)){
-            log.info("Connecting to map-reduce job tracker at: " + properties.get(JOB_TRACKER_LOCATION));
+            log.info("Connecting to map-reduce job tracker at: " + jc.get(JOB_TRACKER_LOCATION));
         }
 
         // Set job-specific configuration knobs
-        jobConf = new JobConf(configuration);
+        jobConf = jc;
     }
 
     public void updateConfiguration(Properties newConfiguration) 
@@ -352,8 +347,8 @@ public class HExecutionEngine {
     }
 
     /**
-     * Method to recompute pig properties by overriding hadoop properties
-     * with pig properties
+     * Method to apply pig properties to JobConf
+     * (replaces properties with resulting jobConf values)
      * @param conf JobConf with appropriate hadoop resource files
      * @param properties Pig properties that will override hadoop properties; properties might be modified
      */
@@ -362,32 +357,23 @@ public class HExecutionEngine {
         // We need to load the properties from the hadoop configuration
         // We want to override these with any existing properties we have.
         if (jobConf != null && properties != null) {
-            Properties hadoopProperties = new Properties();
-            Iterator<Map.Entry<String, String>> iter = jobConf.iterator();
-            while (iter.hasNext()) {
-                Map.Entry<String, String> entry = iter.next();
-                hadoopProperties.put(entry.getKey(), entry.getValue());
-            }
-
-            //override hadoop properties with user defined properties
+            // set user properties on the jobConf to ensure that defaults
+            // and deprecation is applied correctly
             Enumeration<Object> propertiesIter = properties.keys();
             while (propertiesIter.hasMoreElements()) {
                 String key = (String) propertiesIter.nextElement();
                 String val = properties.getProperty(key);
-
                 // We do not put user.name, See PIG-1419
                 if (!key.equals("user.name"))
-                    hadoopProperties.put(key, val);
+                	jobConf.set(key, val);
             }
-            
             //clear user defined properties and re-populate
             properties.clear();
-            Enumeration<Object> hodPropertiesIter = hadoopProperties.keys();
-            while (hodPropertiesIter.hasMoreElements()) {
-                String key = (String) hodPropertiesIter.nextElement();
-                String val = hadoopProperties.getProperty(key);
-                properties.put(key, val);
-            }
+            Iterator<Map.Entry<String, String>> iter = jobConf.iterator();
+            while (iter.hasNext()) {
+                Map.Entry<String, String> entry = iter.next();
+                properties.put(entry.getKey(), entry.getValue());
+            } 
         }
     } 
     

Modified: pig/branches/branch-0.10/src/org/apache/pig/tools/grunt/GruntParser.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.10/src/org/apache/pig/tools/grunt/GruntParser.java?rev=1245145&r1=1245144&r2=1245145&view=diff
==============================================================================
--- pig/branches/branch-0.10/src/org/apache/pig/tools/grunt/GruntParser.java (original)
+++ pig/branches/branch-0.10/src/org/apache/pig/tools/grunt/GruntParser.java Thu Feb 16 19:59:03 2012
@@ -34,6 +34,7 @@ import java.io.StringWriter;
 import java.util.AbstractList;
 import java.util.Arrays;
 import java.util.Date;
+import java.util.Enumeration;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -44,6 +45,7 @@ import jline.ConsoleReaderInputStream;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FsShell;
 import org.apache.hadoop.mapred.JobClient;
 import org.apache.hadoop.mapred.JobConf;
@@ -55,11 +57,10 @@ import org.apache.pig.backend.datastorag
 import org.apache.pig.backend.datastorage.DataStorage;
 import org.apache.pig.backend.datastorage.DataStorageException;
 import org.apache.pig.backend.datastorage.ElementDescriptor;
-import org.apache.pig.backend.executionengine.ExecJob;
+import org.apache.pig.backend.executionengine.ExecException;
 import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil;
 import org.apache.pig.backend.hadoop.datastorage.HDataStorage;
 import org.apache.pig.backend.hadoop.executionengine.HExecutionEngine;
-import org.apache.pig.backend.executionengine.ExecException;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.impl.io.FileLocalizer;
 import org.apache.pig.impl.io.FileLocalizer.FetchFileRet;
@@ -559,7 +560,33 @@ public class GruntParser extends PigScri
         }
         else
         {
-        	mPigServer.getPigContext().getProperties().setProperty(key, value);
+            //mPigServer.getPigContext().getProperties().setProperty(key, value);
+            // PIG-2508 properties need to be managed through JobConf
+            // since all other code depends on access to properties, 
+            // we need to re-populate from updated JobConf 
+            //java.util.HashSet<?> keysBefore = new java.util.HashSet<Object>(mPigServer.getPigContext().getProperties().keySet());        	
+            // set current properties on jobConf
+            Properties properties = mPigServer.getPigContext().getProperties();
+            Configuration jobConf = mPigServer.getPigContext().getExecutionEngine().getJobConf();
+            Enumeration<Object> propertiesIter = properties.keys();
+            while (propertiesIter.hasMoreElements()) {
+                String pkey = (String) propertiesIter.nextElement();
+                String val = properties.getProperty(pkey);
+                // We do not put user.name, See PIG-1419
+                if (!pkey.equals("user.name"))
+                   jobConf.set(pkey, val);
+            }
+            // set new value, JobConf will handle deprecation etc.
+            jobConf.set(key, value);
+            // re-initialize to reflect updated JobConf
+            properties.clear();
+            Iterator<Map.Entry<String, String>> iter = jobConf.iterator();
+            while (iter.hasNext()) {
+                Map.Entry<String, String> entry = iter.next();
+                properties.put(entry.getKey(), entry.getValue());
+            } 
+            //keysBefore.removeAll(mPigServer.getPigContext().getProperties().keySet());
+            //log.info("PIG-2508: keys dropped from properties: " + keysBefore);
         }
     }