You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ka...@apache.org on 2014/11/04 00:55:30 UTC

git commit: MAPREDUCE-6147. Support mapreduce.input.fileinputformat.split.maxsize. (Zhihai Xu via kasha)

Repository: hadoop
Updated Branches:
  refs/heads/branch-1 689ca40fc -> ebec913e2


MAPREDUCE-6147. Support mapreduce.input.fileinputformat.split.maxsize. (Zhihai Xu via kasha)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/ebec913e
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/ebec913e
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/ebec913e

Branch: refs/heads/branch-1
Commit: ebec913e20d0081e2e30e13817e9023565e0b920
Parents: 689ca40
Author: Karthik Kambatla <ka...@apache.org>
Authored: Mon Nov 3 15:55:22 2014 -0800
Committer: Karthik Kambatla <ka...@apache.org>
Committed: Mon Nov 3 15:55:22 2014 -0800

----------------------------------------------------------------------
 CHANGES.txt                                     |  3 +++
 .../mapred/lib/CombineFileInputFormat.java      | 20 ++++++++++++++++--
 .../mapred/lib/TestCombineFileInputFormat.java  | 22 ++++++++++++++++++++
 3 files changed, 43 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/ebec913e/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 904bbd5..a695be2 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -61,6 +61,9 @@ Release 1.3.0 - unreleased
     HADOOP-10614. CBZip2InputStream is not threadsafe (Xiangrui Meng via Sandy
     Ryza)
 
+    MAPREDUCE-6147. Support mapreduce.input.fileinputformat.split.maxsize.
+    (Zhihai Xu via kasha)
+
   BUG FIXES
 
     MAPREDUCE-4490. Fixed LinuxTaskController to re-initialize user log

http://git-wip-us.apache.org/repos/asf/hadoop/blob/ebec913e/src/mapred/org/apache/hadoop/mapred/lib/CombineFileInputFormat.java
----------------------------------------------------------------------
diff --git a/src/mapred/org/apache/hadoop/mapred/lib/CombineFileInputFormat.java b/src/mapred/org/apache/hadoop/mapred/lib/CombineFileInputFormat.java
index 825100f..4977a35 100644
--- a/src/mapred/org/apache/hadoop/mapred/lib/CombineFileInputFormat.java
+++ b/src/mapred/org/apache/hadoop/mapred/lib/CombineFileInputFormat.java
@@ -141,7 +141,23 @@ public abstract class CombineFileInputFormat<K, V>
     }
     return codec instanceof SplittableCompressionCodec;
   }
-  
+
+  /**
+   * First get "mapred.max.split.size".
+   * If "mapred.max.split.size" is not set,
+   * then get "mapreduce.input.fileinputformat.split.maxsize".
+   * If "mapreduce.input.fileinputformat.split.maxsize" is not set,
+   * then return 0.
+   */
+  long getConfiguredMaxSplitSize(JobConf job) {
+    long maxSize = job.getLong("mapred.max.split.size", -1L);
+    if (maxSize == -1L) {
+      maxSize = job.getLong("mapreduce.input.fileinputformat.split.maxsize",
+          0);
+    }
+    return maxSize;
+  }
+
   /**
    * default constructor
    */
@@ -171,7 +187,7 @@ public abstract class CombineFileInputFormat<K, V>
     if (maxSplitSize != 0) {
       maxSize = maxSplitSize;
     } else {
-      maxSize = job.getLong("mapred.max.split.size", 0);
+      maxSize = getConfiguredMaxSplitSize(job);
     }
     if (minSizeNode != 0 && maxSize != 0 && minSizeNode > maxSize) {
       throw new IOException("Minimum split size pernode " + minSizeNode +

http://git-wip-us.apache.org/repos/asf/hadoop/blob/ebec913e/src/test/org/apache/hadoop/mapred/lib/TestCombineFileInputFormat.java
----------------------------------------------------------------------
diff --git a/src/test/org/apache/hadoop/mapred/lib/TestCombineFileInputFormat.java b/src/test/org/apache/hadoop/mapred/lib/TestCombineFileInputFormat.java
index 0d9e766..4ef9902 100644
--- a/src/test/org/apache/hadoop/mapred/lib/TestCombineFileInputFormat.java
+++ b/src/test/org/apache/hadoop/mapred/lib/TestCombineFileInputFormat.java
@@ -898,6 +898,28 @@ public class TestCombineFileInputFormat extends TestCase{
     }
   }
 
+  public void testGetConfiguredMaxSplitSize() throws Throwable {
+    JobConf conf = new JobConf();
+    DummyInputFormat inFormat = new DummyInputFormat();
+    // if both mapred.max.split.size and
+    // mapreduce.input.fileinputformat.split.maxsize are not set, return 0.
+    long maxSize = inFormat.getConfiguredMaxSplitSize(conf);
+    assertEquals(maxSize, 0L);
+
+    // if only mapreduce.input.fileinputformat.split.maxsize is set,
+    // return the value of mapreduce.input.fileinputformat.split.maxsize.
+    conf.setLong("mapreduce.input.fileinputformat.split.maxsize", 100L);
+    maxSize = inFormat.getConfiguredMaxSplitSize(conf);
+    assertEquals(maxSize, 100L);
+
+    // if both mapred.max.split.size and
+    // mapreduce.input.fileinputformat.split.maxsize are set,
+    // return the value of mapred.max.split.size.
+    conf.setLong("mapred.max.split.size", 1000L);
+    maxSize = inFormat.getConfiguredMaxSplitSize(conf);
+    assertEquals(maxSize, 1000L);
+  }
+
   static class TestFilter implements PathFilter {
     private Path p;