You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ro...@apache.org on 2014/03/08 09:41:03 UTC

svn commit: r1575499 - in /pig/branches/tez: ./ ivy/ shims/test/hadoop23/org/apache/pig/test/ src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/ src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/ src/org/apache/pig/backend/...

Author: rohini
Date: Sat Mar  8 08:41:03 2014
New Revision: 1575499

URL: http://svn.apache.org/r1575499
Log:
PIG-3797: Fix some memory leaks affecting container reuse (rohini)

Modified:
    pig/branches/tez/ivy.xml
    pig/branches/tez/ivy/libraries.properties
    pig/branches/tez/shims/test/hadoop23/org/apache/pig/test/TezMiniCluster.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigCombiner.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigGenericMapReduce.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/PhysicalOperator.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/tez/PigProcessor.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/tez/TezDagBuilder.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/tez/TezLauncher.java
    pig/branches/tez/test/tez-tests

Modified: pig/branches/tez/ivy.xml
URL: http://svn.apache.org/viewvc/pig/branches/tez/ivy.xml?rev=1575499&r1=1575498&r2=1575499&view=diff
==============================================================================
--- pig/branches/tez/ivy.xml (original)
+++ pig/branches/tez/ivy.xml Sat Mar  8 08:41:03 2014
@@ -415,8 +415,6 @@
        conf="hadoop23->master"/>
     <dependency org="org.apache.tez" name="tez-dag" rev="${tez.version}"
        conf="hadoop23->master"/>
-    <dependency org="org.apache.tez" name="tez-dist" rev="${tez.version}"
-       conf="hadoop23->master"/>
     <dependency org="org.apache.tez" name="tez-runtime-internals" rev="${tez.version}"
        conf="hadoop23->master"/>
     <dependency org="org.apache.tez" name="tez-runtime-library" rev="${tez.version}"

Modified: pig/branches/tez/ivy/libraries.properties
URL: http://svn.apache.org/viewvc/pig/branches/tez/ivy/libraries.properties?rev=1575499&r1=1575498&r2=1575499&view=diff
==============================================================================
--- pig/branches/tez/ivy/libraries.properties (original)
+++ pig/branches/tez/ivy/libraries.properties Sat Mar  8 08:41:03 2014
@@ -91,5 +91,5 @@ mockito.version=1.8.4
 jansi.version=1.9
 asm.version=3.3.1
 snappy.version=1.0.5-M3
-tez.version=0.3.0-incubating-SNAPSHOT
+tez.version=0.3.0-incubating
 parquet-pig-bundle.version=1.2.3

Modified: pig/branches/tez/shims/test/hadoop23/org/apache/pig/test/TezMiniCluster.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/shims/test/hadoop23/org/apache/pig/test/TezMiniCluster.java?rev=1575499&r1=1575498&r2=1575499&view=diff
==============================================================================
--- pig/branches/tez/shims/test/hadoop23/org/apache/pig/test/TezMiniCluster.java (original)
+++ pig/branches/tez/shims/test/hadoop23/org/apache/pig/test/TezMiniCluster.java Sat Mar  8 08:41:03 2014
@@ -27,7 +27,6 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.v2.MiniMRYarnCluster;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.pig.ExecType;
@@ -90,7 +89,7 @@ public class TezMiniCluster extends Mini
             m_mr_conf.set(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
                     System.getProperty("java.class.path"));
             // TODO PIG-3659 - Remove this once memory management is fixed
-            m_mr_conf.set(MRJobConfig.MAP_JAVA_OPTS, "-Xmx384M");// -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8005 -Xnoagent -Djava.compiler=NONE");
+            // m_mr_conf.set(MRJobConfig.MAP_JAVA_OPTS, "-Xmx384M");// -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8005 -Xnoagent -Djava.compiler=NONE");
             m_mr_conf.writeXml(new FileOutputStream(MAPRED_CONF_FILE));
             m_fileSys.copyFromLocalFile(
                     new Path(MAPRED_CONF_FILE.getAbsoluteFile().toString()),

Modified: pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigCombiner.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigCombiner.java?rev=1575499&r1=1575498&r2=1575499&view=diff
==============================================================================
--- pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigCombiner.java (original)
+++ pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigCombiner.java Sat Mar  8 08:41:03 2014
@@ -25,7 +25,6 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.Reducer;
 import org.apache.log4j.PropertyConfigurator;
 import org.apache.pig.PigException;
@@ -46,8 +45,6 @@ import org.apache.pig.tools.pigstats.Pig
 
 public class PigCombiner {
 
-    public static JobContext sJobContext = null;
-
     public static class Combine
             extends Reducer<PigNullableWritable, NullableTuple, PigNullableWritable, Writable> {
 
@@ -84,7 +81,6 @@ public class PigCombiner {
         @Override
         protected void setup(Context context) throws IOException, InterruptedException {
             super.setup(context);
-            sJobContext = context;
             Configuration jConf = context.getConfiguration();
             try {
                 PigContext.setPackageImportList((ArrayList<String>)ObjectSerializer.deserialize(jConf.get("udf.import.list")));

Modified: pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigGenericMapReduce.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigGenericMapReduce.java?rev=1575499&r1=1575498&r2=1575499&view=diff
==============================================================================
--- pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigGenericMapReduce.java (original)
+++ pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigGenericMapReduce.java Sat Mar  8 08:41:03 2014
@@ -98,7 +98,7 @@ public class PigGenericMapReduce {
     @Deprecated
     public static Configuration sJobConf = null;
 
-    public static final ThreadLocal<Configuration> sJobConfInternal = new ThreadLocal<Configuration>();
+    public static ThreadLocal<Configuration> sJobConfInternal = new ThreadLocal<Configuration>();
 
     public static class Map extends PigMapBase {
 

Modified: pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/PhysicalOperator.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/PhysicalOperator.java?rev=1575499&r1=1575498&r2=1575499&view=diff
==============================================================================
--- pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/PhysicalOperator.java (original)
+++ pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/PhysicalOperator.java Sat Mar  8 08:41:03 2014
@@ -100,7 +100,7 @@ public abstract class PhysicalOperator e
     // Will be used by operators to report status or transmit heartbeat
     // Should be set by the backends to appropriate implementations that
     // wrap their own version of a reporter.
-    private static ThreadLocal<PigProgressable> reporter = new ThreadLocal<PigProgressable>();
+    public static ThreadLocal<PigProgressable> reporter = new ThreadLocal<PigProgressable>();
 
     // Will be used by operators to aggregate warning messages
     // Should be set by the backends to appropriate implementations that

Modified: pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/tez/PigProcessor.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/tez/PigProcessor.java?rev=1575499&r1=1575498&r2=1575499&view=diff
==============================================================================
--- pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/tez/PigProcessor.java (original)
+++ pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/tez/PigProcessor.java Sat Mar  8 08:41:03 2014
@@ -34,6 +34,7 @@ import org.apache.pig.backend.hadoop.exe
 import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapReduce;
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.POStatus;
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator;
+import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PigProgressable;
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result;
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POUserFunc;
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
@@ -74,10 +75,16 @@ public class PigProcessor implements Log
     @Override
     public void initialize(TezProcessorContext processorContext)
             throws Exception {
-        // Reset any static variables to avoid conflic in container-reuse.
+        // Reset any static variables to avoid conflict in container-reuse.
         sampleVertex = null;
         sampleMap = null;
 
+        // Reset static variables cleared for avoiding OOM.
+        // TODO: Figure out a cleaner way to do this. ThreadLocals actually can be avoided all together
+        // for mapreduce/tez mode and just used for Local mode.
+        PhysicalOperator.reporter = new ThreadLocal<PigProgressable>();
+        PigMapReduce.sJobConfInternal = new ThreadLocal<Configuration>();
+
         byte[] payload = processorContext.getUserPayload();
         conf = TezUtils.createConfFromUserPayload(payload);
         PigContext pc = (PigContext) ObjectSerializer.deserialize(conf.get("pig.pigContext"));
@@ -103,8 +110,16 @@ public class PigProcessor implements Log
 
     @Override
     public void close() throws Exception {
-        // TODO Auto-generated method stub
-
+        // Avoid memory leak. ThreadLocals especially leak a lot of memory.
+        PhysicalOperator.reporter = new ThreadLocal<PigProgressable>();
+        PigMapReduce.sJobConfInternal = new ThreadLocal<Configuration>();
+        PigMapReduce.sJobContext = null;
+        execPlan = null;
+        fileOutputs = null;
+        leaf = null;
+        conf = null;
+        sampleMap = null;
+        sampleVertex = null;
     }
 
     @SuppressWarnings("rawtypes")

Modified: pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/tez/TezDagBuilder.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/tez/TezDagBuilder.java?rev=1575499&r1=1575498&r2=1575499&view=diff
==============================================================================
--- pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/tez/TezDagBuilder.java (original)
+++ pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/tez/TezDagBuilder.java Sat Mar  8 08:41:03 2014
@@ -257,6 +257,9 @@ public class TezDagBuilder extends TezOp
                     edge.partitionerClass.getName());
         }
 
+        conf.set("udf.import.list",
+                ObjectSerializer.serialize(PigContext.getPackageImportList()));
+
         MRToTezHelper.convertMRToTezRuntimeConf(conf, globalConf);
 
         in.setUserPayload(TezUtils.createUserPayloadFromConf(conf));

Modified: pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/tez/TezLauncher.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/tez/TezLauncher.java?rev=1575499&r1=1575498&r2=1575499&view=diff
==============================================================================
--- pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/tez/TezLauncher.java (original)
+++ pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/tez/TezLauncher.java Sat Mar  8 08:41:03 2014
@@ -59,6 +59,7 @@ public class TezLauncher extends Launche
 
         TezResourceManager tezResourceManager = new TezResourceManager(stagingDir, pc, conf);
 
+        log.info("Tez staging directory is " + stagingDir.toString());
         conf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir.toString());
 
         List<TezOperPlan> processedPlans = new ArrayList<TezOperPlan>();

Modified: pig/branches/tez/test/tez-tests
URL: http://svn.apache.org/viewvc/pig/branches/tez/test/tez-tests?rev=1575499&r1=1575498&r2=1575499&view=diff
==============================================================================
--- pig/branches/tez/test/tez-tests (original)
+++ pig/branches/tez/test/tez-tests Sat Mar  8 08:41:03 2014
@@ -18,5 +18,4 @@
 **/TestBZip.java
 **/TestCompressedFiles.java
 **/TestCharArrayToNumeric.java
-## TODO: Runs fine individually. Hangs with file.out.index not found when run together. Likely Tez Bug
-##**/TestSecondarySortTez.java
+**/TestSecondarySortTez.java