You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by rk...@apache.org on 2015/04/02 01:41:34 UTC

hadoop git commit: Zero map split input length combine with none zero map split input length will cause MR1 job hung. (zxu via rkanter)

Repository: hadoop
Updated Branches:
  refs/heads/branch-1 8151679f8 -> 5f5138e5b


Zero map split input length combine with none zero map split input length will cause MR1 job hung. (zxu via rkanter)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/5f5138e5
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/5f5138e5
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/5f5138e5

Branch: refs/heads/branch-1
Commit: 5f5138e5b37c570272ceadfa9020f1896223a04f
Parents: 8151679
Author: Robert Kanter <rk...@apache.org>
Authored: Wed Apr 1 15:19:59 2015 -0700
Committer: Robert Kanter <rk...@apache.org>
Committed: Wed Apr 1 15:19:59 2015 -0700

----------------------------------------------------------------------
 CHANGES.txt                                       |  3 +++
 .../org/apache/hadoop/mapred/JobInProgress.java   |  6 +++++-
 .../apache/hadoop/mapred/ResourceEstimator.java   | 12 ++++++++++--
 .../hadoop/mapred/TestResourceEstimation.java     | 18 ++++++++++--------
 4 files changed, 28 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/5f5138e5/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 57f1cf4..93a0a6b 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -282,6 +282,9 @@ Release 1.3.0 - unreleased
 
     HDFS-6649. Documentation for setrep is wrong. (aajisaka)
 
+    Zero map split input length combine with none zero map split input
+    length will cause MR1 job hung. (zxu via rkanter)
+
 Release 1.2.2 - unreleased
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/5f5138e5/src/mapred/org/apache/hadoop/mapred/JobInProgress.java
----------------------------------------------------------------------
diff --git a/src/mapred/org/apache/hadoop/mapred/JobInProgress.java b/src/mapred/org/apache/hadoop/mapred/JobInProgress.java
index 0861584..f134553 100644
--- a/src/mapred/org/apache/hadoop/mapred/JobInProgress.java
+++ b/src/mapred/org/apache/hadoop/mapred/JobInProgress.java
@@ -938,7 +938,11 @@ public class JobInProgress {
   long getInputLength() {
     return inputLength;
   }
- 
+
+  void setInputLength(long length) {
+    inputLength = length;
+  }
+
   boolean isCleanupLaunched() {
     return launchedCleanup;
   }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/5f5138e5/src/mapred/org/apache/hadoop/mapred/ResourceEstimator.java
----------------------------------------------------------------------
diff --git a/src/mapred/org/apache/hadoop/mapred/ResourceEstimator.java b/src/mapred/org/apache/hadoop/mapred/ResourceEstimator.java
index e2f8fc6..ec0e64d 100644
--- a/src/mapred/org/apache/hadoop/mapred/ResourceEstimator.java
+++ b/src/mapred/org/apache/hadoop/mapred/ResourceEstimator.java
@@ -52,8 +52,16 @@ class ResourceEstimator {
     //-1 indicates error, which we don't average in.
     if(tip.isMapTask() &&  ts.getOutputSize() != -1)  {
       completedMapsUpdates++;
-
-      completedMapsInputSize+=(tip.getMapInputSize()+1);
+      long inputSize = tip.getMapInputSize();
+      if (inputSize == 0) {
+        // if map input size is 0, use map output size as input size
+        // to avoid job hung.
+        inputSize = ts.getOutputSize();
+        // map input size is changed, update JobInProgress.inputLength.
+        long length = job.getInputLength() + inputSize;
+        job.setInputLength(length);
+      }
+      completedMapsInputSize+=(inputSize+1);
       completedMapsOutputSize+=ts.getOutputSize();
 
       if(LOG.isDebugEnabled()) {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/5f5138e5/src/test/org/apache/hadoop/mapred/TestResourceEstimation.java
----------------------------------------------------------------------
diff --git a/src/test/org/apache/hadoop/mapred/TestResourceEstimation.java b/src/test/org/apache/hadoop/mapred/TestResourceEstimation.java
index 6a16b72..fed722f 100644
--- a/src/test/org/apache/hadoop/mapred/TestResourceEstimation.java
+++ b/src/test/org/apache/hadoop/mapred/TestResourceEstimation.java
@@ -55,12 +55,13 @@ public class TestResourceEstimation {
     //unfortunately, we can't set job input size from here.
     ResourceEstimator re = new ResourceEstimator(jip);
     
-    for(int i = 0; i < maps / 10 ; ++i) {
-
-      long estOutSize = re.getEstimatedMapOutputSize();
-      System.out.println(estOutSize);
-      assertEquals(0, estOutSize);
-      
+    for(int i = 0; i < maps; ++i) {
+      if (i < maps / 10) {
+        // re.thresholdToUse is maps / 10
+        long estOutSize = re.getEstimatedMapOutputSize();
+        System.out.println(estOutSize);
+        assertEquals(0, estOutSize);
+      }
       TaskStatus ts = new MapTaskStatus();
       ts.setOutputSize(singleMapOutputSize);
       JobSplit.TaskSplitMetaInfo split =
@@ -120,9 +121,10 @@ public class TestResourceEstimation {
     TaskInProgress tip = 
       new TaskInProgress(jid, "", split, jip.jobtracker, jc, jip, 0, 1);
     re.updateWithCompletedTask(ts, tip);
-    
+    // for 0 input size, use output size as input size for calculation
     long expectedTotalMapOutSize = (singleMapOutputSize*11) * 
-      ((maps*singleMapInputSize)+maps)/((singleMapInputSize+1)*10+1);
+      ((maps*singleMapInputSize)+maps)/((singleMapInputSize+1)*10+
+      singleMapOutputSize+1);
     assertEquals(2* expectedTotalMapOutSize/maps, re.getEstimatedMapOutputSize());
   }