You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2012/02/10 07:00:59 UTC
svn commit: r1242691 - in /pig/branches/branch-0.9: CHANGES.txt
contrib/zebra/build.xml
src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java
src/org/apache/pig/tools/grunt/GruntParser.java
Author: daijy
Date: Fri Feb 10 06:00:58 2012
New Revision: 1242691
URL: http://svn.apache.org/viewvc?rev=1242691&view=rev
Log:
PIG-2508: PIG can unpredictably ignore deprecated Hadoop config options
Modified:
pig/branches/branch-0.9/CHANGES.txt
pig/branches/branch-0.9/contrib/zebra/build.xml
pig/branches/branch-0.9/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java
pig/branches/branch-0.9/src/org/apache/pig/tools/grunt/GruntParser.java
Modified: pig/branches/branch-0.9/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.9/CHANGES.txt?rev=1242691&r1=1242690&r2=1242691&view=diff
==============================================================================
--- pig/branches/branch-0.9/CHANGES.txt (original)
+++ pig/branches/branch-0.9/CHANGES.txt Fri Feb 10 06:00:58 2012
@@ -26,6 +26,8 @@ PIG-2493: UNION causes casting issues (v
PIG-2497: Order of execution of fs, store and sh commands in Pig is not maintained (daijy)
+PIG-2508: PIG can unpredictably ignore deprecated Hadoop config options (thw via daijy)
+
Release 0.9.2
IMPROVEMENTS
Modified: pig/branches/branch-0.9/contrib/zebra/build.xml
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.9/contrib/zebra/build.xml?rev=1242691&r1=1242690&r2=1242691&view=diff
==============================================================================
--- pig/branches/branch-0.9/contrib/zebra/build.xml (original)
+++ pig/branches/branch-0.9/contrib/zebra/build.xml Fri Feb 10 06:00:58 2012
@@ -151,10 +151,14 @@ to call at top-level: ant deploy-contrib
classpath to use build/contrib/*.jar
-->
<sysproperty key="user.dir" value="${test.build.dir}/data"/>
-
- <sysproperty key="fs.default.name" value="${fs.default.name}"/>
- <sysproperty key="pig.test.localoutputfile" value="${pig.test.localoutputfile}"/>
- <sysproperty key="pig.log.dir" value="${pig.log.dir}"/>
+ <syspropertyset>
+ <propertyset>
+ <propertyref name="fs.default.name"/>
+ <propertyref name="pig.test.localoutputfile"/>
+ <propertyref name="pig.log.dir"/>
+ </propertyset>
+ </syspropertyset>
+
<classpath refid="test.classpath"/>
<formatter type="${test.junit.output.format}" />
Modified: pig/branches/branch-0.9/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.9/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java?rev=1242691&r1=1242690&r2=1242691&view=diff
==============================================================================
--- pig/branches/branch-0.9/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java (original)
+++ pig/branches/branch-0.9/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java Fri Feb 10 06:00:58 2012
@@ -39,7 +39,6 @@ import org.apache.pig.ExecType;
import org.apache.pig.PigException;
import org.apache.pig.backend.datastorage.DataStorage;
import org.apache.pig.backend.executionengine.ExecException;
-import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil;
import org.apache.pig.backend.hadoop.datastorage.HDataStorage;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
@@ -120,13 +119,12 @@ public class HExecutionEngine {
}
@SuppressWarnings("deprecation")
- public void init(Properties properties) throws ExecException {
+ private void init(Properties properties) throws ExecException {
//First set the ssh socket factory
setSSHFactory();
String cluster = null;
String nameNode = null;
- Configuration configuration = null;
// We need to build a configuration object first in the manner described below
// and then get back a properties object to inspect the JOB_TRACKER_LOCATION
@@ -152,7 +150,7 @@ public class HExecutionEngine {
if( hadoop_site == null && core_site == null ) {
throw new ExecException("Cannot find hadoop configurations in classpath (neither hadoop-site.xml nor core-site.xml was found in the classpath)." +
- "If you plan to use local mode, please put -x local option in command line",
+ " If you plan to use local mode, please put -x local option in command line",
4010);
}
@@ -203,18 +201,15 @@ public class HExecutionEngine {
}
log.info("Connecting to hadoop file system at: " + (nameNode==null? LOCAL: nameNode) ) ;
+ // constructor sets DEFAULT_REPLICATION_FACTOR_KEY
ds = new HDataStorage(properties);
- // The above HDataStorage constructor sets DEFAULT_REPLICATION_FACTOR_KEY in properties.
- configuration = ConfigurationUtil.toConfiguration(properties);
-
-
if(cluster != null && !cluster.equalsIgnoreCase(LOCAL)){
- log.info("Connecting to map-reduce job tracker at: " + properties.get(JOB_TRACKER_LOCATION));
+ log.info("Connecting to map-reduce job tracker at: " + jc.get(JOB_TRACKER_LOCATION));
}
// Set job-specific configuration knobs
- jobConf = new JobConf(configuration);
+ jobConf = jc;
}
public void updateConfiguration(Properties newConfiguration)
@@ -352,8 +347,8 @@ public class HExecutionEngine {
}
/**
- * Method to recompute pig properties by overriding hadoop properties
- * with pig properties
+ * Method to apply pig properties to JobConf
+ * (replaces properties with resulting jobConf values)
* @param conf JobConf with appropriate hadoop resource files
* @param properties Pig properties that will override hadoop properties; properties might be modified
*/
@@ -362,32 +357,23 @@ public class HExecutionEngine {
// We need to load the properties from the hadoop configuration
// We want to override these with any existing properties we have.
if (jobConf != null && properties != null) {
- Properties hadoopProperties = new Properties();
- Iterator<Map.Entry<String, String>> iter = jobConf.iterator();
- while (iter.hasNext()) {
- Map.Entry<String, String> entry = iter.next();
- hadoopProperties.put(entry.getKey(), entry.getValue());
- }
-
- //override hadoop properties with user defined properties
+ // set user properties on the jobConf to ensure that defaults
+ // and deprecation is applied correctly
Enumeration<Object> propertiesIter = properties.keys();
while (propertiesIter.hasMoreElements()) {
String key = (String) propertiesIter.nextElement();
String val = properties.getProperty(key);
-
// We do not put user.name, See PIG-1419
if (!key.equals("user.name"))
- hadoopProperties.put(key, val);
+ jobConf.set(key, val);
}
-
//clear user defined properties and re-populate
properties.clear();
- Enumeration<Object> hodPropertiesIter = hadoopProperties.keys();
- while (hodPropertiesIter.hasMoreElements()) {
- String key = (String) hodPropertiesIter.nextElement();
- String val = hadoopProperties.getProperty(key);
- properties.put(key, val);
- }
+ Iterator<Map.Entry<String, String>> iter = jobConf.iterator();
+ while (iter.hasNext()) {
+ Map.Entry<String, String> entry = iter.next();
+ properties.put(entry.getKey(), entry.getValue());
+ }
}
}
Modified: pig/branches/branch-0.9/src/org/apache/pig/tools/grunt/GruntParser.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.9/src/org/apache/pig/tools/grunt/GruntParser.java?rev=1242691&r1=1242690&r2=1242691&view=diff
==============================================================================
--- pig/branches/branch-0.9/src/org/apache/pig/tools/grunt/GruntParser.java (original)
+++ pig/branches/branch-0.9/src/org/apache/pig/tools/grunt/GruntParser.java Fri Feb 10 06:00:58 2012
@@ -34,6 +34,7 @@ import java.io.StringWriter;
import java.util.AbstractList;
import java.util.Arrays;
import java.util.Date;
+import java.util.Enumeration;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
@@ -44,6 +45,7 @@ import jline.ConsoleReaderInputStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FsShell;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
@@ -55,11 +57,10 @@ import org.apache.pig.backend.datastorag
import org.apache.pig.backend.datastorage.DataStorage;
import org.apache.pig.backend.datastorage.DataStorageException;
import org.apache.pig.backend.datastorage.ElementDescriptor;
-import org.apache.pig.backend.executionengine.ExecJob;
+import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil;
import org.apache.pig.backend.hadoop.datastorage.HDataStorage;
import org.apache.pig.backend.hadoop.executionengine.HExecutionEngine;
-import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.io.FileLocalizer;
import org.apache.pig.impl.io.FileLocalizer.FetchFileRet;
@@ -559,7 +560,33 @@ public class GruntParser extends PigScri
}
else
{
- mPigServer.getPigContext().getProperties().setProperty(key, value);
+ //mPigServer.getPigContext().getProperties().setProperty(key, value);
+ // PIG-2508 properties need to be managed through JobConf
+ // since all other code depends on access to properties,
+ // we need to re-populate from updated JobConf
+ //java.util.HashSet<?> keysBefore = new java.util.HashSet<Object>(mPigServer.getPigContext().getProperties().keySet());
+ // set current properties on jobConf
+ Properties properties = mPigServer.getPigContext().getProperties();
+ Configuration jobConf = mPigServer.getPigContext().getExecutionEngine().getJobConf();
+ Enumeration<Object> propertiesIter = properties.keys();
+ while (propertiesIter.hasMoreElements()) {
+ String pkey = (String) propertiesIter.nextElement();
+ String val = properties.getProperty(pkey);
+ // We do not put user.name, See PIG-1419
+ if (!pkey.equals("user.name"))
+ jobConf.set(pkey, val);
+ }
+ // set new value, JobConf will handle deprecation etc.
+ jobConf.set(key, value);
+ // re-initialize to reflect updated JobConf
+ properties.clear();
+ Iterator<Map.Entry<String, String>> iter = jobConf.iterator();
+ while (iter.hasNext()) {
+ Map.Entry<String, String> entry = iter.next();
+ properties.put(entry.getKey(), entry.getValue());
+ }
+ //keysBefore.removeAll(mPigServer.getPigContext().getProperties().keySet());
+ //log.info("PIG-2508: keys dropped from properties: " + keysBefore);
}
}