You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tez.apache.org by bi...@apache.org on 2013/11/12 00:37:06 UTC

git commit: TEZ-609. YARN may report 0 capacity for job and grouping needs to take care of that. (bikas)

Updated Branches:
  refs/heads/master 70c7e16d6 -> 18540ca92


TEZ-609. YARN may report 0 capacity for job and grouping needs to take care of that. (bikas)


Project: http://git-wip-us.apache.org/repos/asf/incubator-tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tez/commit/18540ca9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tez/tree/18540ca9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tez/diff/18540ca9

Branch: refs/heads/master
Commit: 18540ca924d9ac9309f601aea6cda19616f699db
Parents: 70c7e16
Author: Bikas Saha <bi...@apache.org>
Authored: Mon Nov 11 15:32:35 2013 -0800
Committer: Bikas Saha <bi...@apache.org>
Committed: Mon Nov 11 15:32:57 2013 -0800

----------------------------------------------------------------------
 .../hadoop/mapred/split/TezGroupedSplitsInputFormat.java    | 6 +++---
 .../hadoop/mapreduce/split/TezGroupedSplitsInputFormat.java | 9 ++++-----
 .../org/apache/hadoop/mapred/split/TestGroupedSplits.java   | 5 +++--
 3 files changed, 10 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tez/blob/18540ca9/tez-mapreduce/src/main/java/org/apache/hadoop/mapred/split/TezGroupedSplitsInputFormat.java
----------------------------------------------------------------------
diff --git a/tez-mapreduce/src/main/java/org/apache/hadoop/mapred/split/TezGroupedSplitsInputFormat.java b/tez-mapreduce/src/main/java/org/apache/hadoop/mapred/split/TezGroupedSplitsInputFormat.java
index 585fbba..f523c66 100644
--- a/tez-mapreduce/src/main/java/org/apache/hadoop/mapred/split/TezGroupedSplitsInputFormat.java
+++ b/tez-mapreduce/src/main/java/org/apache/hadoop/mapred/split/TezGroupedSplitsInputFormat.java
@@ -120,7 +120,6 @@ public class TezGroupedSplitsInputFormat<K, V>
     InputSplit[] originalSplits = wrappedInputFormat.getSplits(job, numSplits);
     
     if (! (configNumSplits > 0 || 
-          desiredNumSplits == 0 ||
           originalSplits == null || 
           originalSplits.length == 0) ) {
       // numSplits has not been overridden by config
@@ -131,8 +130,9 @@ public class TezGroupedSplitsInputFormat<K, V>
       for (InputSplit split : originalSplits) {
         totalLength += split.getLength();
       }
-  
-      long lengthPerGroup = totalLength/desiredNumSplits;
+
+      int splitCount = desiredNumSplits>0?desiredNumSplits:originalSplits.length;
+      long lengthPerGroup = totalLength/splitCount;
       
       long maxLengthPerGroup = job.getLong(
           TezConfiguration.TEZ_AM_GROUPING_SPLIT_MAX_SIZE,

http://git-wip-us.apache.org/repos/asf/incubator-tez/blob/18540ca9/tez-mapreduce/src/main/java/org/apache/hadoop/mapreduce/split/TezGroupedSplitsInputFormat.java
----------------------------------------------------------------------
diff --git a/tez-mapreduce/src/main/java/org/apache/hadoop/mapreduce/split/TezGroupedSplitsInputFormat.java b/tez-mapreduce/src/main/java/org/apache/hadoop/mapreduce/split/TezGroupedSplitsInputFormat.java
index b815c8a..0ff250a 100644
--- a/tez-mapreduce/src/main/java/org/apache/hadoop/mapreduce/split/TezGroupedSplitsInputFormat.java
+++ b/tez-mapreduce/src/main/java/org/apache/hadoop/mapreduce/split/TezGroupedSplitsInputFormat.java
@@ -115,10 +115,8 @@ public class TezGroupedSplitsInputFormat<K, V> extends InputFormat<K, V>
     List<InputSplit> originalSplits = wrappedInputFormat.getSplits(context);
     
     if (! (configNumSplits > 0 || 
-          desiredNumSplits == 0 ||
           originalSplits == null || 
-          originalSplits.size() == 0) ||
-          desiredNumSplits >= originalSplits.size()) {
+          originalSplits.size() == 0)) {
       // numSplits has not been overridden by config
       // numSplits has been set at runtime
       // there are splits generated
@@ -129,8 +127,9 @@ public class TezGroupedSplitsInputFormat<K, V> extends InputFormat<K, V>
         totalLength += split.getLength();
       }
   
-      long lengthPerGroup = totalLength/desiredNumSplits;
-      
+      int splitCount = desiredNumSplits>0?desiredNumSplits:originalSplits.size();
+      long lengthPerGroup = totalLength/splitCount;
+
       long maxLengthPerGroup = conf.getLong(
           TezConfiguration.TEZ_AM_GROUPING_SPLIT_MAX_SIZE,
           TezConfiguration.TEZ_AM_GROUPING_SPLIT_MAX_SIZE_DEFAULT);

http://git-wip-us.apache.org/repos/asf/incubator-tez/blob/18540ca9/tez-mapreduce/src/test/java/org/apache/hadoop/mapred/split/TestGroupedSplits.java
----------------------------------------------------------------------
diff --git a/tez-mapreduce/src/test/java/org/apache/hadoop/mapred/split/TestGroupedSplits.java b/tez-mapreduce/src/test/java/org/apache/hadoop/mapred/split/TestGroupedSplits.java
index 0f9d078..b7d09e1 100644
--- a/tez-mapreduce/src/test/java/org/apache/hadoop/mapred/split/TestGroupedSplits.java
+++ b/tez-mapreduce/src/test/java/org/apache/hadoop/mapred/split/TestGroupedSplits.java
@@ -306,9 +306,10 @@ public class TestGroupedSplits {
     }
     when(mockWrappedFormat.getSplits((JobConf)anyObject(), anyInt())).thenReturn(mockSplits);
     
-    // desired splits not set. return original
+    // desired splits not set. We end up choosing min/max split size based on 
+    // total data and num original splits. In this case, min size will be hit
     InputSplit[] splits = format.getSplits(job, 0);
-    Assert.assertEquals(numSplits, splits.length);
+    Assert.assertEquals(25, splits.length);
     
     // split too big. override with max
     format.setDesiredNumberOfSplits(1);