You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by sh...@apache.org on 2017/12/08 01:05:44 UTC

[1/4] hadoop git commit: MAPREDUCE-6165. [JDK8] TestCombineFileInputFormat failed on JDK8. Contributed by Akira AJISAKA.

Repository: hadoop
Updated Branches:
  refs/heads/branch-2.7 fbb44d796 -> 261f8abce
  refs/heads/branch-2.7.5 f124ab610 -> 2c5c89fa4


MAPREDUCE-6165. [JDK8] TestCombineFileInputFormat failed on JDK8. Contributed by Akira AJISAKA.

(cherry picked from commit 551615fa13f65ae996bae9c1bacff189539b6557)
(cherry picked from commit 4e96175b334925466a9790e6ca20d0ec7d350791)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/922728db
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/922728db
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/922728db

Branch: refs/heads/branch-2.7.5
Commit: 922728db1acd1dd33e4252f1f72d98b0f8a9cd4a
Parents: f124ab6
Author: Tsuyoshi Ozawa <oz...@apache.org>
Authored: Tue May 5 10:23:13 2015 +0900
Committer: Konstantin V Shvachko <sh...@apache.org>
Committed: Thu Dec 7 13:29:28 2017 -0800

----------------------------------------------------------------------
 hadoop-mapreduce-project/CHANGES.txt            |    3 +
 .../lib/input/CombineFileInputFormat.java       |   26 +-
 .../lib/input/TestCombineFileInputFormat.java   | 1138 ++++++++++++------
 3 files changed, 805 insertions(+), 362 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/922728db/hadoop-mapreduce-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index cbff832..d073d7c 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -21,6 +21,9 @@ Release 2.7.5 - 2017-12-01
     MAPREDUCE-6957. shuffle hangs after a node manager connection timeout.
     (Jooseong Kim via jlowe)
 
+    MAPREDUCE-6165. [JDK8] TestCombineFileInputFormat failed on JDK8.
+    Contributed by Akira AJISAKA.
+
 Release 2.7.4 - 2017-08-04
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/922728db/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java
index 040c54b..b2b7656 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java
@@ -29,7 +29,6 @@ import java.util.HashMap;
 import java.util.Set;
 import java.util.Iterator;
 import java.util.Map;
-import java.util.Map.Entry;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -289,6 +288,26 @@ public abstract class CombineFileInputFormat<K, V>
                  maxSize, minSizeNode, minSizeRack, splits);
   }
 
+  /**
+   * Process all the nodes and create splits that are local to a node.
+   * Generate one split per node iteration, and walk over nodes multiple times
+   * to distribute the splits across nodes.
+   * <p>
+   * Note: The order of processing the nodes is undetermined because the
+   * implementation of nodeToBlocks is {@link java.util.HashMap} and its order
+   * of the entries is undetermined.
+   * @param nodeToBlocks Mapping from a node to the list of blocks that
+   *                     it contains.
+   * @param blockToNodes Mapping from a block to the nodes on which
+   *                     it has replicas.
+   * @param rackToBlocks Mapping from a rack name to the list of blocks it has.
+   * @param totLength Total length of the input files.
+   * @param maxSize Max size of each split.
+   *                If set to 0, disable smoothing load.
+   * @param minSizeNode Minimum split size per node.
+   * @param minSizeRack Minimum split size per rack.
+   * @param splits New splits created by this method are added to the list.
+   */
   @VisibleForTesting
   void createSplits(Map<String, Set<OneBlockInfo>> nodeToBlocks,
                      Map<OneBlockInfo, String[]> blockToNodes,
@@ -309,11 +328,6 @@ public abstract class CombineFileInputFormat<K, V>
     Set<String> completedNodes = new HashSet<String>();
     
     while(true) {
-      // it is allowed for maxSize to be 0. Disable smoothing load for such cases
-
-      // process all nodes and create splits that are local to a node. Generate
-      // one split per node iteration, and walk over nodes multiple times to
-      // distribute the splits across nodes. 
       for (Iterator<Map.Entry<String, Set<OneBlockInfo>>> iter = nodeToBlocks
           .entrySet().iterator(); iter.hasNext();) {
         Map.Entry<String, Set<OneBlockInfo>> one = iter.next();

http://git-wip-us.apache.org/repos/asf/hadoop/blob/922728db/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java
index 85c675c..b49f2d8 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java
@@ -22,6 +22,7 @@ import java.io.OutputStream;
 import java.net.URI;
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -53,13 +54,22 @@ import org.apache.hadoop.mapreduce.TaskType;
 import org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat.OneBlockInfo;
 import org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat.OneFileInfo;
 import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
+
+import org.junit.Before;
 import org.junit.Test;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
 
 import com.google.common.collect.HashMultiset;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import static org.mockito.Mockito.atLeastOnce;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.reset;
+import static org.mockito.Mockito.verify;
 
 public class TestCombineFileInputFormat {
 
@@ -92,6 +102,14 @@ public class TestCombineFileInputFormat {
   static final int BLOCKSIZE = 1024;
   static final byte[] databuf = new byte[BLOCKSIZE];
 
+  @Mock
+  private List<String> mockList;
+
+  @Before
+  public void initMocks() {
+    MockitoAnnotations.initMocks(this);
+  }
+
   private static final String DUMMY_FS_URI = "dummyfs:///";
 
   /** Dummy class to extend CombineFileInputFormat*/
@@ -299,7 +317,51 @@ public class TestCombineFileInputFormat {
     assertFalse(rr.nextKeyValue());
   }
 
+  /**
+   * For testing each split has the expected name, length, and offset.
+   */
+  private final class Split {
+    private String name;
+    private long length;
+    private long offset;
+
+    public Split(String name, long length, long offset) {
+      this.name = name;
+      this.length = length;
+      this.offset = offset;
+    }
+
+    public String getName() {
+      return name;
+    }
+
+    public long getLength() {
+      return length;
+    }
+
+    public long getOffset() {
+      return offset;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (obj instanceof Split) {
+        Split split = ((Split) obj);
+        return split.name.equals(name) && split.length == length
+            && split.offset == offset;
+      }
+      return false;
+    }
+  }
+
+  /**
+   * The test suppresses unchecked warnings in
+   * {@link org.mockito.Mockito#reset}. Although calling the method is
+   * a bad manner, we call the method instead of splitting the test
+   * (i.e. restarting MiniDFSCluster) to save time.
+   */
   @Test
+  @SuppressWarnings("unchecked")
   public void testSplitPlacement() throws Exception {
     MiniDFSCluster dfs = null;
     FileSystem fileSys = null;
@@ -326,10 +388,10 @@ public class TestCombineFileInputFormat {
         throw new IOException("Mkdirs failed to create " + inDir.toString());
       }
       Path file1 = new Path(dir1 + "/file1");
-      writeFile(conf, file1, (short)1, 1);
+      writeFile(conf, file1, (short) 1, 1);
       // create another file on the same datanode
       Path file5 = new Path(dir5 + "/file5");
-      writeFile(conf, file5, (short)1, 1);
+      writeFile(conf, file5, (short) 1, 1);
       // split it using a CombinedFile input format
       DummyInputFormat inFormat = new DummyInputFormat();
       Job job = Job.getInstance(conf);
@@ -350,13 +412,13 @@ public class TestCombineFileInputFormat {
       assertEquals(0, fileSplit.getOffset(1));
       assertEquals(BLOCKSIZE, fileSplit.getLength(1));
       assertEquals(hosts1[0], fileSplit.getLocations()[0]);
-      
+
       dfs.startDataNodes(conf, 1, true, null, rack2, hosts2, null);
       dfs.waitActive();
 
       // create file on two datanodes.
       Path file2 = new Path(dir2 + "/file2");
-      writeFile(conf, file2, (short)2, 2);
+      writeFile(conf, file2, (short) 2, 2);
 
       // split it using a CombinedFile input format
       inFormat = new DummyInputFormat();
@@ -365,34 +427,67 @@ public class TestCombineFileInputFormat {
       splits = inFormat.getSplits(job);
       System.out.println("Made splits(Test1): " + splits.size());
 
-      // make sure that each split has different locations
       for (InputSplit split : splits) {
         System.out.println("File split(Test1): " + split);
       }
-      assertEquals(2, splits.size());
-      fileSplit = (CombineFileSplit) splits.get(0);
-      assertEquals(2, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(file2.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(0));
-      assertEquals(file2.getName(), fileSplit.getPath(1).getName());
-      assertEquals(BLOCKSIZE, fileSplit.getOffset(1));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(1));
-      assertEquals(hosts2[0], fileSplit.getLocations()[0]); // should be on r2
-      fileSplit = (CombineFileSplit) splits.get(1);
-      assertEquals(1, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(file1.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(0));
-      assertEquals(hosts1[0], fileSplit.getLocations()[0]); // should be on r1
+
+      for (InputSplit split : splits) {
+        fileSplit = (CombineFileSplit) split;
+        /**
+         * If rack1 is processed first by
+         * {@link CombineFileInputFormat#createSplits},
+         * create only one split on rack1. Otherwise create two splits.
+         */
+        if (splits.size() == 2) {
+          // first split is on rack2, contains file2
+          if (split.equals(splits.get(0))) {
+            assertEquals(2, fileSplit.getNumPaths());
+            assertEquals(1, fileSplit.getLocations().length);
+            assertEquals(file2.getName(), fileSplit.getPath(0).getName());
+            assertEquals(0, fileSplit.getOffset(0));
+            assertEquals(BLOCKSIZE, fileSplit.getLength(0));
+            assertEquals(file2.getName(), fileSplit.getPath(1).getName());
+            assertEquals(BLOCKSIZE, fileSplit.getOffset(1));
+            assertEquals(BLOCKSIZE, fileSplit.getLength(1));
+            assertEquals(hosts2[0], fileSplit.getLocations()[0]);
+          }
+          // second split is on rack1, contains file1
+          if (split.equals(splits.get(1))) {
+            assertEquals(1, fileSplit.getNumPaths());
+            assertEquals(1, fileSplit.getLocations().length);
+            assertEquals(file1.getName(), fileSplit.getPath(0).getName());
+            assertEquals(0, fileSplit.getOffset(0));
+            assertEquals(BLOCKSIZE, fileSplit.getLength(0));
+            assertEquals(hosts1[0], fileSplit.getLocations()[0]);
+          }
+        } else if (splits.size() == 1) {
+          // first split is on rack1, contains file1 and file2.
+          assertEquals(3, fileSplit.getNumPaths());
+          Set<Split> expected = new HashSet<>();
+          expected.add(new Split(file1.getName(), BLOCKSIZE, 0));
+          expected.add(new Split(file2.getName(), BLOCKSIZE, 0));
+          expected.add(new Split(file2.getName(), BLOCKSIZE, BLOCKSIZE));
+          List<Split> actual = new ArrayList<>();
+          for (int i = 0; i < 3; i++) {
+            String name = fileSplit.getPath(i).getName();
+            long length = fileSplit.getLength(i);
+            long offset = fileSplit.getOffset(i);
+            actual.add(new Split(name, length, offset));
+          }
+          assertTrue(actual.containsAll(expected));
+          assertEquals(1, fileSplit.getLocations().length);
+          assertEquals(hosts1[0], fileSplit.getLocations()[0]);
+        } else {
+          fail("Expected split size is 1 or 2, but actual size is "
+              + splits.size());
+        }
+      }
 
       // create another file on 3 datanodes and 3 racks.
       dfs.startDataNodes(conf, 1, true, null, rack3, hosts3, null);
       dfs.waitActive();
       Path file3 = new Path(dir3 + "/file3");
-      writeFile(conf, new Path(dir3 + "/file3"), (short)3, 3);
+      writeFile(conf, new Path(dir3 + "/file3"), (short) 3, 3);
       inFormat = new DummyInputFormat();
       FileInputFormat.setInputPaths(job, dir1 + "," + dir2 + "," + dir3);
       inFormat.setMinSplitSizeRack(BLOCKSIZE);
@@ -400,37 +495,98 @@ public class TestCombineFileInputFormat {
       for (InputSplit split : splits) {
         System.out.println("File split(Test2): " + split);
       }
-      assertEquals(3, splits.size());
-      fileSplit = (CombineFileSplit) splits.get(0);
-      assertEquals(3, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(file3.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(0));
-      assertEquals(file3.getName(), fileSplit.getPath(1).getName());
-      assertEquals(BLOCKSIZE, fileSplit.getOffset(1));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(1));
-      assertEquals(file3.getName(), fileSplit.getPath(2).getName());
-      assertEquals(2 * BLOCKSIZE, fileSplit.getOffset(2));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(2));
-      assertEquals(hosts3[0], fileSplit.getLocations()[0]); // should be on r3
-      fileSplit = (CombineFileSplit) splits.get(1);
-      assertEquals(2, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(file2.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(0));
-      assertEquals(file2.getName(), fileSplit.getPath(1).getName());
-      assertEquals(BLOCKSIZE, fileSplit.getOffset(1));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(1));
-      assertEquals(hosts2[0], fileSplit.getLocations()[0]); // should be on r2
-      fileSplit = (CombineFileSplit) splits.get(2);
-      assertEquals(1, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(file1.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(0));
-      assertEquals(hosts1[0], fileSplit.getLocations()[0]); // should be on r1
+
+      Set<Split> expected = new HashSet<>();
+      expected.add(new Split(file1.getName(), BLOCKSIZE, 0));
+      expected.add(new Split(file2.getName(), BLOCKSIZE, 0));
+      expected.add(new Split(file2.getName(), BLOCKSIZE, BLOCKSIZE));
+      expected.add(new Split(file3.getName(), BLOCKSIZE, 0));
+      expected.add(new Split(file3.getName(), BLOCKSIZE, BLOCKSIZE));
+      expected.add(new Split(file3.getName(), BLOCKSIZE, BLOCKSIZE * 2));
+      List<Split> actual = new ArrayList<>();
+
+      for (InputSplit split : splits) {
+        fileSplit = (CombineFileSplit) split;
+        /**
+         * If rack1 is processed first by
+         * {@link CombineFileInputFormat#createSplits},
+         * create only one split on rack1.
+         * If rack2 or rack3 is processed first and rack1 is processed second,
+         * create one split on rack2 or rack3 and the other split is on rack1.
+         * Otherwise create 3 splits for each rack.
+         */
+        if (splits.size() == 3) {
+          // first split is on rack3, contains file3
+          if (split.equals(splits.get(0))) {
+            assertEquals(3, fileSplit.getNumPaths());
+            assertEquals(1, fileSplit.getLocations().length);
+            assertEquals(file3.getName(), fileSplit.getPath(0).getName());
+            assertEquals(0, fileSplit.getOffset(0));
+            assertEquals(BLOCKSIZE, fileSplit.getLength(0));
+            assertEquals(file3.getName(), fileSplit.getPath(1).getName());
+            assertEquals(BLOCKSIZE, fileSplit.getOffset(1));
+            assertEquals(BLOCKSIZE, fileSplit.getLength(1));
+            assertEquals(file3.getName(), fileSplit.getPath(2).getName());
+            assertEquals(2 * BLOCKSIZE, fileSplit.getOffset(2));
+            assertEquals(BLOCKSIZE, fileSplit.getLength(2));
+            assertEquals(hosts3[0], fileSplit.getLocations()[0]);
+          }
+          // second split is on rack2, contains file2
+          if (split.equals(splits.get(1))) {
+            assertEquals(2, fileSplit.getNumPaths());
+            assertEquals(1, fileSplit.getLocations().length);
+            assertEquals(file2.getName(), fileSplit.getPath(0).getName());
+            assertEquals(0, fileSplit.getOffset(0));
+            assertEquals(BLOCKSIZE, fileSplit.getLength(0));
+            assertEquals(file2.getName(), fileSplit.getPath(1).getName());
+            assertEquals(BLOCKSIZE, fileSplit.getOffset(1));
+            assertEquals(BLOCKSIZE, fileSplit.getLength(1));
+            assertEquals(hosts2[0], fileSplit.getLocations()[0]);
+          }
+          // third split is on rack1, contains file1
+          if (split.equals(splits.get(2))) {
+            assertEquals(1, fileSplit.getNumPaths());
+            assertEquals(1, fileSplit.getLocations().length);
+            assertEquals(file1.getName(), fileSplit.getPath(0).getName());
+            assertEquals(0, fileSplit.getOffset(0));
+            assertEquals(BLOCKSIZE, fileSplit.getLength(0));
+            assertEquals(hosts1[0], fileSplit.getLocations()[0]);
+          }
+        } else if (splits.size() == 2) {
+          // first split is on rack2 or rack3, contains one or two files.
+          if (split.equals(splits.get(0))) {
+            assertEquals(1, fileSplit.getLocations().length);
+            if (fileSplit.getLocations()[0].equals(hosts2[0])) {
+              assertEquals(2, fileSplit.getNumPaths());
+            } else if (fileSplit.getLocations()[0].equals(hosts3[0])) {
+              assertEquals(3, fileSplit.getNumPaths());
+            } else {
+              fail("First split should be on rack2 or rack3.");
+            }
+          }
+          // second split is on rack1, contains the rest files.
+          if (split.equals(splits.get(1))) {
+            assertEquals(1, fileSplit.getLocations().length);
+            assertEquals(hosts1[0], fileSplit.getLocations()[0]);
+          }
+        } else if (splits.size() == 1) {
+          // first split is rack1, contains all three files.
+          assertEquals(1, fileSplit.getLocations().length);
+          assertEquals(6, fileSplit.getNumPaths());
+          assertEquals(hosts1[0], fileSplit.getLocations()[0]);
+        } else {
+          fail("Split size should be 1, 2, or 3.");
+        }
+        for (int i = 0; i < fileSplit.getNumPaths(); i++) {
+          String name = fileSplit.getPath(i).getName();
+          long length = fileSplit.getLength(i);
+          long offset = fileSplit.getOffset(i);
+          actual.add(new Split(name, length, offset));
+        }
+      }
+
+      assertEquals(6, actual.size());
+      assertTrue(actual.containsAll(expected));
 
       // create file4 on all three racks
       Path file4 = new Path(dir4 + "/file4");
@@ -442,37 +598,85 @@ public class TestCombineFileInputFormat {
       for (InputSplit split : splits) {
         System.out.println("File split(Test3): " + split);
       }
-      assertEquals(3, splits.size());
-      fileSplit = (CombineFileSplit) splits.get(0);
-      assertEquals(6, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(file3.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(0));
-      assertEquals(file3.getName(), fileSplit.getPath(1).getName());
-      assertEquals(BLOCKSIZE, fileSplit.getOffset(1));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(1));
-      assertEquals(file3.getName(), fileSplit.getPath(2).getName());
-      assertEquals(2 * BLOCKSIZE, fileSplit.getOffset(2));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(2));
-      assertEquals(hosts3[0], fileSplit.getLocations()[0]); // should be on r3
-      fileSplit = (CombineFileSplit) splits.get(1);
-      assertEquals(2, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(file2.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(0));
-      assertEquals(file2.getName(), fileSplit.getPath(1).getName());
-      assertEquals(BLOCKSIZE, fileSplit.getOffset(1));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(1));
-      assertEquals(hosts2[0], fileSplit.getLocations()[0]); // should be on r2
-      fileSplit = (CombineFileSplit) splits.get(2);
-      assertEquals(1, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(file1.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(0));
-      assertEquals(hosts1[0], fileSplit.getLocations()[0]); // should be on r1
+
+      expected.add(new Split(file4.getName(), BLOCKSIZE, 0));
+      expected.add(new Split(file4.getName(), BLOCKSIZE, BLOCKSIZE));
+      expected.add(new Split(file4.getName(), BLOCKSIZE, BLOCKSIZE * 2));
+      actual.clear();
+
+      for (InputSplit split : splits) {
+        fileSplit = (CombineFileSplit) split;
+        /**
+         * If rack1 is processed first by
+         * {@link CombineFileInputFormat#createSplits},
+         * create only one split on rack1.
+         * If rack2 or rack3 is processed first and rack1 is processed second,
+         * create one split on rack2 or rack3 and the other split is on rack1.
+         * Otherwise create 3 splits for each rack.
+         */
+        if (splits.size() == 3) {
+          // first split is on rack3, contains file3 and file4
+          if (split.equals(splits.get(0))) {
+            assertEquals(6, fileSplit.getNumPaths());
+            assertEquals(1, fileSplit.getLocations().length);
+            assertEquals(hosts3[0], fileSplit.getLocations()[0]);
+          }
+          // second split is on rack2, contains file2
+          if (split.equals(splits.get(1))) {
+            assertEquals(2, fileSplit.getNumPaths());
+            assertEquals(1, fileSplit.getLocations().length);
+            assertEquals(file2.getName(), fileSplit.getPath(0).getName());
+            assertEquals(0, fileSplit.getOffset(0));
+            assertEquals(BLOCKSIZE, fileSplit.getLength(0));
+            assertEquals(file2.getName(), fileSplit.getPath(1).getName());
+            assertEquals(BLOCKSIZE, fileSplit.getOffset(1));
+            assertEquals(BLOCKSIZE, fileSplit.getLength(1));
+            assertEquals(hosts2[0], fileSplit.getLocations()[0]);
+          }
+          // third split is on rack1, contains file1
+          if (split.equals(splits.get(2))) {
+            assertEquals(1, fileSplit.getNumPaths());
+            assertEquals(1, fileSplit.getLocations().length);
+            assertEquals(file1.getName(), fileSplit.getPath(0).getName());
+            assertEquals(0, fileSplit.getOffset(0));
+            assertEquals(BLOCKSIZE, fileSplit.getLength(0));
+            assertEquals(hosts1[0], fileSplit.getLocations()[0]);
+          }
+        } else if (splits.size() == 2) {
+          // first split is on rack2 or rack3, contains two or three files.
+          if (split.equals(splits.get(0))) {
+            assertEquals(1, fileSplit.getLocations().length);
+            if (fileSplit.getLocations()[0].equals(hosts2[0])) {
+              assertEquals(5, fileSplit.getNumPaths());
+            } else if (fileSplit.getLocations()[0].equals(hosts3[0])) {
+              assertEquals(6, fileSplit.getNumPaths());
+            } else {
+              fail("First split should be on rack2 or rack3.");
+            }
+          }
+          // second split is on rack1, contains the rest files.
+          if (split.equals(splits.get(1))) {
+            assertEquals(1, fileSplit.getLocations().length);
+            assertEquals(hosts1[0], fileSplit.getLocations()[0]);
+          }
+        } else if (splits.size() == 1) {
+          // first split is rack1, contains all four files.
+          assertEquals(1, fileSplit.getLocations().length);
+          assertEquals(9, fileSplit.getNumPaths());
+          assertEquals(hosts1[0], fileSplit.getLocations()[0]);
+        } else {
+          fail("Split size should be 1, 2, or 3.");
+        }
+        for (int i = 0; i < fileSplit.getNumPaths(); i++) {
+          String name = fileSplit.getPath(i).getName();
+          long length = fileSplit.getLength(i);
+          long offset = fileSplit.getOffset(i);
+          actual.add(new Split(name, length, offset));
+        }
+      }
+
+      assertEquals(9, actual.size());
+      assertTrue(actual.containsAll(expected));
 
       // maximum split size is 2 blocks 
       inFormat = new DummyInputFormat();
@@ -485,34 +689,26 @@ public class TestCombineFileInputFormat {
         System.out.println("File split(Test4): " + split);
       }
       assertEquals(5, splits.size());
-      fileSplit = (CombineFileSplit) splits.get(0);
-      assertEquals(2, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(file3.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(0));
-      assertEquals(file3.getName(), fileSplit.getPath(1).getName());
-      assertEquals(BLOCKSIZE, fileSplit.getOffset(1));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(1));
-      assertEquals("host3.rack3.com", fileSplit.getLocations()[0]);
-      fileSplit = (CombineFileSplit) splits.get(1);
-      assertEquals(file2.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(0));
-      assertEquals(file2.getName(), fileSplit.getPath(1).getName());
-      assertEquals(BLOCKSIZE, fileSplit.getOffset(1));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(1));
-      assertEquals("host2.rack2.com", fileSplit.getLocations()[0]);
-      fileSplit = (CombineFileSplit) splits.get(2);
-      assertEquals(2, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(file1.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(0));
-      assertEquals(file3.getName(), fileSplit.getPath(1).getName());
-      assertEquals(2 * BLOCKSIZE, fileSplit.getOffset(1));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(1));
-      assertEquals("host1.rack1.com", fileSplit.getLocations()[0]);
+
+      actual.clear();
+      reset(mockList);
+      for (InputSplit split : splits) {
+        fileSplit = (CombineFileSplit) split;
+        for (int i = 0; i < fileSplit.getNumPaths(); i++) {
+          String name = fileSplit.getPath(i).getName();
+          long length = fileSplit.getLength(i);
+          long offset = fileSplit.getOffset(i);
+          actual.add(new Split(name, length, offset));
+        }
+        mockList.add(fileSplit.getLocations()[0]);
+      }
+
+      assertEquals(9, actual.size());
+      assertTrue(actual.containsAll(expected));
+      // verify the splits are on all the racks
+      verify(mockList, atLeastOnce()).add(hosts1[0]);
+      verify(mockList, atLeastOnce()).add(hosts2[0]);
+      verify(mockList, atLeastOnce()).add(hosts3[0]);
 
       // maximum split size is 3 blocks 
       inFormat = new DummyInputFormat();
@@ -524,44 +720,26 @@ public class TestCombineFileInputFormat {
       for (InputSplit split : splits) {
         System.out.println("File split(Test5): " + split);
       }
+
       assertEquals(3, splits.size());
-      fileSplit = (CombineFileSplit) splits.get(0);
-      assertEquals(3, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(file3.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(0));
-      assertEquals(file3.getName(), fileSplit.getPath(1).getName());
-      assertEquals(BLOCKSIZE, fileSplit.getOffset(1));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(1));
-      assertEquals(file3.getName(), fileSplit.getPath(2).getName());
-      assertEquals(2 * BLOCKSIZE, fileSplit.getOffset(2));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(2));
-      assertEquals("host3.rack3.com", fileSplit.getLocations()[0]);
-      fileSplit = (CombineFileSplit) splits.get(1);
-      assertEquals(file2.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(0));
-      assertEquals(file2.getName(), fileSplit.getPath(1).getName());
-      assertEquals(BLOCKSIZE, fileSplit.getOffset(1));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(1));
-      assertEquals(file4.getName(), fileSplit.getPath(2).getName());
-      assertEquals(0, fileSplit.getOffset(2));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(2));
-      assertEquals("host2.rack2.com", fileSplit.getLocations()[0]);
-      fileSplit = (CombineFileSplit) splits.get(2);
-      assertEquals(3, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(file1.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(0));
-      assertEquals(file4.getName(), fileSplit.getPath(1).getName());
-      assertEquals(BLOCKSIZE, fileSplit.getOffset(1));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(1));
-      assertEquals(file4.getName(), fileSplit.getPath(2).getName());
-      assertEquals(2*BLOCKSIZE, fileSplit.getOffset(2));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(2));
-      assertEquals("host1.rack1.com", fileSplit.getLocations()[0]);
+
+      actual.clear();
+      reset(mockList);
+      for (InputSplit split : splits) {
+        fileSplit = (CombineFileSplit) split;
+        for (int i = 0; i < fileSplit.getNumPaths(); i++) {
+          String name = fileSplit.getPath(i).getName();
+          long length = fileSplit.getLength(i);
+          long offset = fileSplit.getOffset(i);
+          actual.add(new Split(name, length, offset));
+        }
+        mockList.add(fileSplit.getLocations()[0]);
+      }
+
+      assertEquals(9, actual.size());
+      assertTrue(actual.containsAll(expected));
+      verify(mockList, atLeastOnce()).add(hosts1[0]);
+      verify(mockList, atLeastOnce()).add(hosts2[0]);
 
       // maximum split size is 4 blocks 
       inFormat = new DummyInputFormat();
@@ -572,41 +750,23 @@ public class TestCombineFileInputFormat {
         System.out.println("File split(Test6): " + split);
       }
       assertEquals(3, splits.size());
-      fileSplit = (CombineFileSplit) splits.get(0);
-      assertEquals(4, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(file3.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(0));
-      assertEquals(file3.getName(), fileSplit.getPath(1).getName());
-      assertEquals(BLOCKSIZE, fileSplit.getOffset(1));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(1));
-      assertEquals(file3.getName(), fileSplit.getPath(2).getName());
-      assertEquals(2 * BLOCKSIZE, fileSplit.getOffset(2));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(2));
-      assertEquals("host3.rack3.com", fileSplit.getLocations()[0]);
-      fileSplit = (CombineFileSplit) splits.get(1);
-      assertEquals(4, fileSplit.getNumPaths());
-      assertEquals(file2.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(0));
-      assertEquals(file2.getName(), fileSplit.getPath(1).getName());
-      assertEquals(BLOCKSIZE, fileSplit.getOffset(1));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(1));
-      assertEquals(file4.getName(), fileSplit.getPath(2).getName());
-      assertEquals(BLOCKSIZE, fileSplit.getOffset(2));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(2));
-      assertEquals(file4.getName(), fileSplit.getPath(3).getName());
-      assertEquals( 2 * BLOCKSIZE, fileSplit.getOffset(3));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(3));
-      assertEquals("host2.rack2.com", fileSplit.getLocations()[0]);
-      fileSplit = (CombineFileSplit) splits.get(2);
-      assertEquals(1, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(file1.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(BLOCKSIZE, fileSplit.getLength(0));
-      assertEquals(hosts1[0], fileSplit.getLocations()[0]); // should be on r1
+
+      actual.clear();
+      reset(mockList);
+      for (InputSplit split : splits) {
+        fileSplit = (CombineFileSplit) split;
+        for (int i = 0; i < fileSplit.getNumPaths(); i++) {
+          String name = fileSplit.getPath(i).getName();
+          long length = fileSplit.getLength(i);
+          long offset = fileSplit.getOffset(i);
+          actual.add(new Split(name, length, offset));
+        }
+        mockList.add(fileSplit.getLocations()[0]);
+      }
+
+      assertEquals(9, actual.size());
+      assertTrue(actual.containsAll(expected));
+      verify(mockList, atLeastOnce()).add(hosts1[0]);
 
       // maximum split size is 7 blocks and min is 3 blocks
       inFormat = new DummyInputFormat();
@@ -619,20 +779,31 @@ public class TestCombineFileInputFormat {
       for (InputSplit split : splits) {
         System.out.println("File split(Test7): " + split);
       }
+
       assertEquals(2, splits.size());
-      fileSplit = (CombineFileSplit) splits.get(0);
-      assertEquals(6, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals("host3.rack3.com", fileSplit.getLocations()[0]);
-      fileSplit = (CombineFileSplit) splits.get(1);
-      assertEquals(3, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals("host1.rack1.com", fileSplit.getLocations()[0]);
+
+      actual.clear();
+      reset(mockList);
+      for (InputSplit split : splits) {
+        fileSplit = (CombineFileSplit) split;
+        for (int i = 0; i < fileSplit.getNumPaths(); i++) {
+          String name = fileSplit.getPath(i).getName();
+          long length = fileSplit.getLength(i);
+          long offset = fileSplit.getOffset(i);
+          actual.add(new Split(name, length, offset));
+        }
+        mockList.add(fileSplit.getLocations()[0]);
+      }
+
+      assertEquals(9, actual.size());
+      assertTrue(actual.containsAll(expected));
+      verify(mockList, atLeastOnce()).add(hosts1[0]);
 
       // Rack 1 has file1, file2 and file3 and file4
       // Rack 2 has file2 and file3 and file4
       // Rack 3 has file3 and file4
-      // setup a filter so that only file1 and file2 can be combined
+      // setup a filter so that only (file1 and file2) or (file3 and file4)
+      // can be combined
       inFormat = new DummyInputFormat();
       FileInputFormat.addInputPath(job, inDir);
       inFormat.setMinSplitSizeRack(1); // everything is at least rack local
@@ -642,19 +813,101 @@ public class TestCombineFileInputFormat {
       for (InputSplit split : splits) {
         System.out.println("File split(Test1): " + split);
       }
-      assertEquals(3, splits.size());
-      fileSplit = (CombineFileSplit) splits.get(0);
-      assertEquals(2, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(hosts2[0], fileSplit.getLocations()[0]); // should be on r2
-      fileSplit = (CombineFileSplit) splits.get(1);
-      assertEquals(1, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(hosts1[0], fileSplit.getLocations()[0]); // should be on r1
-      fileSplit = (CombineFileSplit) splits.get(2);
-      assertEquals(6, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(hosts3[0], fileSplit.getLocations()[0]); // should be on r3
+
+      for (InputSplit split : splits) {
+        fileSplit = (CombineFileSplit) split;
+        if (splits.size() == 2) {
+          // first split is on rack1, contains file1 and file2.
+          if (split.equals(splits.get(0))) {
+            assertEquals(3, fileSplit.getNumPaths());
+            expected.clear();
+            expected.add(new Split(file1.getName(), BLOCKSIZE, 0));
+            expected.add(new Split(file2.getName(), BLOCKSIZE, 0));
+            expected.add(new Split(file2.getName(), BLOCKSIZE, BLOCKSIZE));
+            actual.clear();
+            for (int i = 0; i < 3; i++) {
+              String name = fileSplit.getPath(i).getName();
+              long length = fileSplit.getLength(i);
+              long offset = fileSplit.getOffset(i);
+              actual.add(new Split(name, length, offset));
+            }
+            assertTrue(actual.containsAll(expected));
+            assertEquals(1, fileSplit.getLocations().length);
+            assertEquals(hosts1[0], fileSplit.getLocations()[0]);
+          }
+          if (split.equals(splits.get(1))) {
+            // second split contains the file3 and file4, however,
+            // the locations is undetermined.
+            assertEquals(6, fileSplit.getNumPaths());
+            expected.clear();
+            expected.add(new Split(file3.getName(), BLOCKSIZE, 0));
+            expected.add(new Split(file3.getName(), BLOCKSIZE, BLOCKSIZE));
+            expected.add(new Split(file3.getName(), BLOCKSIZE, BLOCKSIZE * 2));
+            expected.add(new Split(file4.getName(), BLOCKSIZE, 0));
+            expected.add(new Split(file4.getName(), BLOCKSIZE, BLOCKSIZE));
+            expected.add(new Split(file4.getName(), BLOCKSIZE, BLOCKSIZE * 2));
+            actual.clear();
+            for (int i = 0; i < 6; i++) {
+              String name = fileSplit.getPath(i).getName();
+              long length = fileSplit.getLength(i);
+              long offset = fileSplit.getOffset(i);
+              actual.add(new Split(name, length, offset));
+            }
+            assertTrue(actual.containsAll(expected));
+            assertEquals(1, fileSplit.getLocations().length);
+          }
+        } else if (splits.size() == 3) {
+          if (split.equals(splits.get(0))) {
+            // first split is on rack2, contains file2
+            assertEquals(2, fileSplit.getNumPaths());
+            expected.clear();
+            expected.add(new Split(file2.getName(), BLOCKSIZE, 0));
+            expected.add(new Split(file2.getName(), BLOCKSIZE, BLOCKSIZE));
+            actual.clear();
+            for (int i = 0; i < 2; i++) {
+              String name = fileSplit.getPath(i).getName();
+              long length = fileSplit.getLength(i);
+              long offset = fileSplit.getOffset(i);
+              actual.add(new Split(name, length, offset));
+            }
+            assertTrue(actual.containsAll(expected));
+            assertEquals(1, fileSplit.getLocations().length);
+            assertEquals(hosts2[0], fileSplit.getLocations()[0]);
+          }
+          if (split.equals(splits.get(1))) {
+            // second split is on rack1, contains file1
+            assertEquals(1, fileSplit.getNumPaths());
+            assertEquals(file1.getName(), fileSplit.getPath(0).getName());
+            assertEquals(BLOCKSIZE, fileSplit.getLength(0));
+            assertEquals(0, fileSplit.getOffset(0));
+            assertEquals(1, fileSplit.getLocations().length);
+            assertEquals(hosts1[0], fileSplit.getLocations()[0]);
+          }
+          if (split.equals(splits.get(2))) {
+            // third split contains file3 and file4, however,
+            // the locations is undetermined.
+            assertEquals(6, fileSplit.getNumPaths());
+            expected.clear();
+            expected.add(new Split(file3.getName(), BLOCKSIZE, 0));
+            expected.add(new Split(file3.getName(), BLOCKSIZE, BLOCKSIZE));
+            expected.add(new Split(file3.getName(), BLOCKSIZE, BLOCKSIZE * 2));
+            expected.add(new Split(file4.getName(), BLOCKSIZE, 0));
+            expected.add(new Split(file4.getName(), BLOCKSIZE, BLOCKSIZE));
+            expected.add(new Split(file4.getName(), BLOCKSIZE, BLOCKSIZE * 2));
+            actual.clear();
+            for (int i = 0; i < 6; i++) {
+              String name = fileSplit.getPath(i).getName();
+              long length = fileSplit.getLength(i);
+              long offset = fileSplit.getOffset(i);
+              actual.add(new Split(name, length, offset));
+            }
+            assertTrue(actual.containsAll(expected));
+            assertEquals(1, fileSplit.getLocations().length);
+          }
+        } else {
+          fail("Split size should be 2 or 3.");
+        }
+      }
 
       // measure performance when there are multiple pools and
       // many files in each pool.
@@ -844,7 +1097,14 @@ public class TestCombineFileInputFormat {
     assertEquals(3, nodeSplits.count(locations[1]));
   }
 
+  /**
+   * The test suppresses unchecked warnings in
+   * {@link org.mockito.Mockito#reset}. Although calling the method is
+   * a bad manner, we call the method instead of splitting the test
+   * (i.e. restarting MiniDFSCluster) to save time.
+   */
   @Test
+  @SuppressWarnings("unchecked")
   public void testSplitPlacementForCompressedFiles() throws Exception {
     MiniDFSCluster dfs = null;
     FileSystem fileSys = null;
@@ -915,21 +1175,55 @@ public class TestCombineFileInputFormat {
       for (InputSplit split : splits) {
         System.out.println("File split(Test1): " + split);
       }
-      assertEquals(2, splits.size());
-      fileSplit = (CombineFileSplit) splits.get(0);
-      assertEquals(1, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(file2.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(f2.getLen(), fileSplit.getLength(0));
-      assertEquals(hosts2[0], fileSplit.getLocations()[0]); // should be on r2
-      fileSplit = (CombineFileSplit) splits.get(1);
-      assertEquals(1, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(file1.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(f1.getLen(), fileSplit.getLength(0));
-      assertEquals(hosts1[0], fileSplit.getLocations()[0]); // should be on r1
+
+      Set<Split> expected = new HashSet<>();
+      expected.add(new Split(file1.getName(), f1.getLen(), 0));
+      expected.add(new Split(file2.getName(), f2.getLen(), 0));
+      List<Split> actual = new ArrayList<>();
+
+      /**
+       * If rack1 is processed first by
+       * {@link CombineFileInputFormat#createSplits},
+       * create only one split on rack1. Otherwise create two splits.
+       */
+      for (InputSplit split : splits) {
+        fileSplit = (CombineFileSplit) split;
+        if (splits.size() == 2) {
+          if (split.equals(splits.get(0))) {
+            // first split is on rack2, contains file2.
+            assertEquals(1, fileSplit.getNumPaths());
+            assertEquals(1, fileSplit.getLocations().length);
+            assertEquals(file2.getName(), fileSplit.getPath(0).getName());
+            assertEquals(0, fileSplit.getOffset(0));
+            assertEquals(f2.getLen(), fileSplit.getLength(0));
+            assertEquals(hosts2[0], fileSplit.getLocations()[0]);
+          }
+          if (split.equals(splits.get(1))) {
+            // second split is on rack1, contains file1.
+            assertEquals(1, fileSplit.getNumPaths());
+            assertEquals(1, fileSplit.getLocations().length);
+            assertEquals(file1.getName(), fileSplit.getPath(0).getName());
+            assertEquals(0, fileSplit.getOffset(0));
+            assertEquals(f1.getLen(), fileSplit.getLength(0));
+            assertEquals(hosts1[0], fileSplit.getLocations()[0]);
+          }
+        } else if (splits.size() == 1) {
+          // first split is on rack1, contains file1 and file2.
+          assertEquals(2, fileSplit.getNumPaths());
+          assertEquals(1, fileSplit.getLocations().length);
+          assertEquals(hosts1[0], fileSplit.getLocations()[0]);
+        } else {
+          fail("Split size should be 1 or 2.");
+        }
+        for (int i = 0; i < fileSplit.getNumPaths(); i++) {
+          String name = fileSplit.getPath(i).getName();
+          long length = fileSplit.getLength(i);
+          long offset = fileSplit.getOffset(i);
+          actual.add(new Split(name, length, offset));
+        }
+      }
+      assertEquals(2, actual.size());
+      assertTrue(actual.containsAll(expected));
 
       // create another file on 3 datanodes and 3 racks.
       dfs.startDataNodes(conf, 1, true, null, rack3, hosts3, null);
@@ -943,28 +1237,83 @@ public class TestCombineFileInputFormat {
       for (InputSplit split : splits) {
         System.out.println("File split(Test2): " + split);
       }
-      assertEquals(3, splits.size());
-      fileSplit = (CombineFileSplit) splits.get(0);
-      assertEquals(1, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(file3.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(f3.getLen(), fileSplit.getLength(0));
-      assertEquals(hosts3[0], fileSplit.getLocations()[0]); // should be on r3
-      fileSplit = (CombineFileSplit) splits.get(1);
-      assertEquals(1, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(file2.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(f2.getLen(), fileSplit.getLength(0));
-      assertEquals(hosts2[0], fileSplit.getLocations()[0]); // should be on r2
-      fileSplit = (CombineFileSplit) splits.get(2);
-      assertEquals(1, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(file1.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(f1.getLen(), fileSplit.getLength(0));
-      assertEquals(hosts1[0], fileSplit.getLocations()[0]); // should be on r1
+
+      expected.add(new Split(file3.getName(), f3.getLen(), 0));
+      actual.clear();
+
+      for (InputSplit split : splits) {
+        fileSplit = (CombineFileSplit) split;
+        /**
+         * If rack1 is processed first by
+         * {@link CombineFileInputFormat#createSplits},
+         * create only one split on rack1.
+         * If rack2 or rack3 is processed first and rack1 is processed second,
+         * create one split on rack2 or rack3 and the other split is on rack1.
+         * Otherwise create 3 splits for each rack.
+         */
+        if (splits.size() == 3) {
+          // first split is on rack3, contains file3
+          if (split.equals(splits.get(0))) {
+            assertEquals(1, fileSplit.getNumPaths());
+            assertEquals(file3.getName(), fileSplit.getPath(0).getName());
+            assertEquals(f3.getLen(), fileSplit.getLength(0));
+            assertEquals(0, fileSplit.getOffset(0));
+            assertEquals(1, fileSplit.getLocations().length);
+            assertEquals(hosts3[0], fileSplit.getLocations()[0]);
+          }
+          // second split is on rack2, contains file2
+          if (split.equals(splits.get(1))) {
+            assertEquals(1, fileSplit.getNumPaths());
+            assertEquals(file2.getName(), fileSplit.getPath(0).getName());
+            assertEquals(f2.getLen(), fileSplit.getLength(0));
+            assertEquals(0, fileSplit.getOffset(0));
+            assertEquals(1, fileSplit.getLocations().length);
+            assertEquals(hosts2[0], fileSplit.getLocations()[0]);
+          }
+          // third split is on rack1, contains file1
+          if (split.equals(splits.get(2))) {
+            assertEquals(1, fileSplit.getNumPaths());
+            assertEquals(file1.getName(), fileSplit.getPath(0).getName());
+            assertEquals(f1.getLen(), fileSplit.getLength(0));
+            assertEquals(0, fileSplit.getOffset(0));
+            assertEquals(1, fileSplit.getLocations().length);
+            assertEquals(hosts1[0], fileSplit.getLocations()[0]);
+          }
+        } else if (splits.size() == 2) {
+          // first split is on rack2 or rack3, contains one or two files.
+          if (split.equals(splits.get(0))) {
+            assertEquals(1, fileSplit.getLocations().length);
+            if (fileSplit.getLocations()[0].equals(hosts2[0])) {
+              assertEquals(2, fileSplit.getNumPaths());
+            } else if (fileSplit.getLocations()[0].equals(hosts3[0])) {
+              assertEquals(1, fileSplit.getNumPaths());
+            } else {
+              fail("First split should be on rack2 or rack3.");
+            }
+          }
+          // second split is on rack1, contains the rest files.
+          if (split.equals(splits.get(1))) {
+            assertEquals(1, fileSplit.getLocations().length);
+            assertEquals(hosts1[0], fileSplit.getLocations()[0]);
+          }
+        } else if (splits.size() == 1) {
+          // first split is rack1, contains all three files.
+          assertEquals(1, fileSplit.getLocations().length);
+          assertEquals(3, fileSplit.getNumPaths());
+          assertEquals(hosts1[0], fileSplit.getLocations()[0]);
+        } else {
+          fail("Split size should be 1, 2, or 3.");
+        }
+        for (int i = 0; i < fileSplit.getNumPaths(); i++) {
+          String name = fileSplit.getPath(i).getName();
+          long length = fileSplit.getLength(i);
+          long offset = fileSplit.getOffset(i);
+          actual.add(new Split(name, length, offset));
+        }
+      }
+
+      assertEquals(3, actual.size());
+      assertTrue(actual.containsAll(expected));
 
       // create file4 on all three racks
       Path file4 = new Path(dir4 + "/file4.gz");
@@ -977,31 +1326,79 @@ public class TestCombineFileInputFormat {
       for (InputSplit split : splits) {
         System.out.println("File split(Test3): " + split);
       }
-      assertEquals(3, splits.size());
-      fileSplit = (CombineFileSplit) splits.get(0);
-      assertEquals(2, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(file3.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(f3.getLen(), fileSplit.getLength(0));
-      assertEquals(file4.getName(), fileSplit.getPath(1).getName());
-      assertEquals(0, fileSplit.getOffset(1));
-      assertEquals(f4.getLen(), fileSplit.getLength(1));
-      assertEquals(hosts3[0], fileSplit.getLocations()[0]); // should be on r3
-      fileSplit = (CombineFileSplit) splits.get(1);
-      assertEquals(1, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(file2.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(f2.getLen(), fileSplit.getLength(0));
-      assertEquals(hosts2[0], fileSplit.getLocations()[0]); // should be on r2
-      fileSplit = (CombineFileSplit) splits.get(2);
-      assertEquals(1, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(file1.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(f1.getLen(), fileSplit.getLength(0));
-      assertEquals(hosts1[0], fileSplit.getLocations()[0]); // should be on r1
+
+      expected.add(new Split(file3.getName(), f3.getLen(), 0));
+      actual.clear();
+
+      for (InputSplit split : splits) {
+        fileSplit = (CombineFileSplit) split;
+        /**
+         * If rack1 is processed first by
+         * {@link CombineFileInputFormat#createSplits},
+         * create only one split on rack1.
+         * If rack2 or rack3 is processed first and rack1 is processed second,
+         * create one split on rack2 or rack3 and the other split is on rack1.
+         * Otherwise create 3 splits for each rack.
+         */
+        if (splits.size() == 3) {
+          // first split is on rack3, contains file3 and file4
+          if (split.equals(splits.get(0))) {
+            assertEquals(2, fileSplit.getNumPaths());
+            assertEquals(hosts3[0], fileSplit.getLocations()[0]);
+          }
+          // second split is on rack2, contains file2
+          if (split.equals(splits.get(1))) {
+            assertEquals(1, fileSplit.getNumPaths());
+            assertEquals(file2.getName(), fileSplit.getPath(0).getName());
+            assertEquals(f2.getLen(), fileSplit.getLength(0));
+            assertEquals(0, fileSplit.getOffset(0));
+            assertEquals(1, fileSplit.getLocations().length);
+            assertEquals(hosts2[0], fileSplit.getLocations()[0]);
+          }
+          // third split is on rack1, contains file1
+          if (split.equals(splits.get(2))) {
+            assertEquals(1, fileSplit.getNumPaths());
+            assertEquals(file1.getName(), fileSplit.getPath(0).getName());
+            assertEquals(f1.getLen(), fileSplit.getLength(0));
+            assertEquals(0, fileSplit.getOffset(0));
+            assertEquals(1, fileSplit.getLocations().length);
+            assertEquals(hosts1[0], fileSplit.getLocations()[0]);
+          }
+        } else if (splits.size() == 2) {
+          // first split is on rack2 or rack3, contains two or three files.
+          if (split.equals(splits.get(0))) {
+            assertEquals(1, fileSplit.getLocations().length);
+            if (fileSplit.getLocations()[0].equals(hosts2[0])) {
+              assertEquals(3, fileSplit.getNumPaths());
+            } else if (fileSplit.getLocations()[0].equals(hosts3[0])) {
+              assertEquals(2, fileSplit.getNumPaths());
+            } else {
+              fail("First split should be on rack2 or rack3.");
+            }
+          }
+          // second split is on rack1, contains the rest files.
+          if (split.equals(splits.get(1))) {
+            assertEquals(1, fileSplit.getLocations().length);
+            assertEquals(hosts1[0], fileSplit.getLocations()[0]);
+          }
+        } else if (splits.size() == 1) {
+          // first split is rack1, contains all four files.
+          assertEquals(1, fileSplit.getLocations().length);
+          assertEquals(4, fileSplit.getNumPaths());
+          assertEquals(hosts1[0], fileSplit.getLocations()[0]);
+        } else {
+          fail("Split size should be 1, 2, or 3.");
+        }
+        for (int i = 0; i < fileSplit.getNumPaths(); i++) {
+          String name = fileSplit.getPath(i).getName();
+          long length = fileSplit.getLength(i);
+          long offset = fileSplit.getOffset(i);
+          actual.add(new Split(name, length, offset));
+        }
+      }
+
+      assertEquals(4, actual.size());
+      assertTrue(actual.containsAll(expected));
 
       // maximum split size is file1's length
       inFormat = new DummyInputFormat();
@@ -1014,32 +1411,24 @@ public class TestCombineFileInputFormat {
         System.out.println("File split(Test4): " + split);
       }
       assertEquals(4, splits.size());
-      fileSplit = (CombineFileSplit) splits.get(0);
-      assertEquals(1, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(file3.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(f3.getLen(), fileSplit.getLength(0));
-      assertEquals(hosts3[0], fileSplit.getLocations()[0]); // should be on r3
-      fileSplit = (CombineFileSplit) splits.get(1);
-      assertEquals(file2.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(f2.getLen(), fileSplit.getLength(0));
-      assertEquals(hosts2[0], fileSplit.getLocations()[0]); // should be on r3
-      fileSplit = (CombineFileSplit) splits.get(2);
-      assertEquals(1, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(file1.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(f1.getLen(), fileSplit.getLength(0));
-      assertEquals(hosts1[0], fileSplit.getLocations()[0]); // should be on r2
-      fileSplit = (CombineFileSplit) splits.get(3);
-      assertEquals(1, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(file4.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(f4.getLen(), fileSplit.getLength(0));
-      assertEquals(hosts3[0], fileSplit.getLocations()[0]); // should be on r1
+
+      actual.clear();
+      for (InputSplit split : splits) {
+        fileSplit = (CombineFileSplit) split;
+        for (int i = 0; i < fileSplit.getNumPaths(); i++) {
+          String name = fileSplit.getPath(i).getName();
+          long length = fileSplit.getLength(i);
+          long offset = fileSplit.getOffset(i);
+          actual.add(new Split(name, length, offset));
+        }
+        mockList.add(fileSplit.getLocations()[0]);
+      }
+
+      assertEquals(4, actual.size());
+      assertTrue(actual.containsAll(expected));
+      verify(mockList, atLeastOnce()).add(hosts1[0]);
+      verify(mockList, atLeastOnce()).add(hosts2[0]);
+      verify(mockList, atLeastOnce()).add(hosts3[0]);
 
       // maximum split size is twice file1's length
       inFormat = new DummyInputFormat();
@@ -1051,31 +1440,33 @@ public class TestCombineFileInputFormat {
       for (InputSplit split : splits) {
         System.out.println("File split(Test5): " + split);
       }
-      assertEquals(3, splits.size());
-      fileSplit = (CombineFileSplit) splits.get(0);
-      assertEquals(2, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(file3.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(f3.getLen(), fileSplit.getLength(0));
-      assertEquals(file4.getName(), fileSplit.getPath(1).getName());
-      assertEquals(0, fileSplit.getOffset(1));
-      assertEquals(f4.getLen(), fileSplit.getLength(1));
-      assertEquals(hosts3[0], fileSplit.getLocations()[0]);
-      fileSplit = (CombineFileSplit) splits.get(1);
-      assertEquals(1, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(file2.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(f2.getLen(), fileSplit.getLength(0));
-      assertEquals(hosts2[0], fileSplit.getLocations()[0]); // should be on r2
-      fileSplit = (CombineFileSplit) splits.get(2);
-      assertEquals(1, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(file1.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(f1.getLen(), fileSplit.getLength(0));
-      assertEquals(hosts1[0], fileSplit.getLocations()[0]); // should be on r1
+
+      actual.clear();
+      reset(mockList);
+      for (InputSplit split : splits) {
+        fileSplit = (CombineFileSplit) split;
+        for (int i = 0; i < fileSplit.getNumPaths(); i++) {
+          String name = fileSplit.getPath(i).getName();
+          long length = fileSplit.getLength(i);
+          long offset = fileSplit.getOffset(i);
+          actual.add(new Split(name, length, offset));
+        }
+        mockList.add(fileSplit.getLocations()[0]);
+      }
+      assertEquals(4, actual.size());
+      assertTrue(actual.containsAll(expected));
+
+      if (splits.size() == 3) {
+        // splits are on all the racks
+        verify(mockList, times(1)).add(hosts1[0]);
+        verify(mockList, times(1)).add(hosts2[0]);
+        verify(mockList, times(1)).add(hosts3[0]);
+      } else if (splits.size() == 2) {
+        // one split is on rack1, another split is on rack2 or rack3
+        verify(mockList, times(1)).add(hosts1[0]);
+      } else {
+        fail("Split size should be 2 or 3.");
+      }
 
       // maximum split size is 4 times file1's length 
       inFormat = new DummyInputFormat();
@@ -1087,26 +1478,29 @@ public class TestCombineFileInputFormat {
       for (InputSplit split : splits) {
         System.out.println("File split(Test6): " + split);
       }
-      assertEquals(2, splits.size());
-      fileSplit = (CombineFileSplit) splits.get(0);
-      assertEquals(2, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(file3.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(f3.getLen(), fileSplit.getLength(0));
-      assertEquals(file4.getName(), fileSplit.getPath(1).getName());
-      assertEquals(0, fileSplit.getOffset(1));
-      assertEquals(f4.getLen(), fileSplit.getLength(1));
-      assertEquals(hosts3[0], fileSplit.getLocations()[0]);
-      fileSplit = (CombineFileSplit) splits.get(1);
-      assertEquals(2, fileSplit.getNumPaths());
-      assertEquals(file1.getName(), fileSplit.getPath(0).getName());
-      assertEquals(0, fileSplit.getOffset(0));
-      assertEquals(f1.getLen(), fileSplit.getLength(0));
-      assertEquals(file2.getName(), fileSplit.getPath(1).getName());
-      assertEquals(0, fileSplit.getOffset(1), BLOCKSIZE);
-      assertEquals(f2.getLen(), fileSplit.getLength(1));
-      assertEquals(hosts1[0], fileSplit.getLocations()[0]); // should be on r1
+
+      /**
+       * If rack1 is processed first by
+       * {@link CombineFileInputFormat#createSplits},
+       * create only one split on rack1. Otherwise create two splits.
+       */
+      assertTrue("Split size should be 1 or 2.",
+          splits.size() == 1 || splits.size() == 2);
+      actual.clear();
+      reset(mockList);
+      for (InputSplit split : splits) {
+        fileSplit = (CombineFileSplit) split;
+        for (int i = 0; i < fileSplit.getNumPaths(); i++) {
+          String name = fileSplit.getPath(i).getName();
+          long length = fileSplit.getLength(i);
+          long offset = fileSplit.getOffset(i);
+          actual.add(new Split(name, length, offset));
+        }
+        mockList.add(fileSplit.getLocations()[0]);
+      }
+      assertEquals(4, actual.size());
+      assertTrue(actual.containsAll(expected));
+      verify(mockList, times(1)).add(hosts1[0]);
 
       // maximum split size and min-split-size per rack is 4 times file1's length
       inFormat = new DummyInputFormat();
@@ -1146,25 +1540,57 @@ public class TestCombineFileInputFormat {
       inFormat = new DummyInputFormat();
       FileInputFormat.addInputPath(job, inDir);
       inFormat.setMinSplitSizeRack(1); // everything is at least rack local
-      inFormat.createPool(new TestFilter(dir1), 
-                          new TestFilter(dir2));
+      inFormat.createPool(new TestFilter(dir1),
+          new TestFilter(dir2));
       splits = inFormat.getSplits(job);
       for (InputSplit split : splits) {
         System.out.println("File split(Test9): " + split);
       }
-      assertEquals(3, splits.size());
-      fileSplit = (CombineFileSplit) splits.get(0);
-      assertEquals(1, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(hosts2[0], fileSplit.getLocations()[0]); // should be on r2
-      fileSplit = (CombineFileSplit) splits.get(1);
-      assertEquals(1, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(hosts1[0], fileSplit.getLocations()[0]); // should be on r1
-      fileSplit = (CombineFileSplit) splits.get(2);
-      assertEquals(2, fileSplit.getNumPaths());
-      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(hosts3[0], fileSplit.getLocations()[0]); // should be on r3
+
+      actual.clear();
+      for (InputSplit split : splits) {
+        fileSplit = (CombineFileSplit) split;
+        if (splits.size() == 3) {
+          // If rack2 is processed first
+          if (split.equals(splits.get(0))) {
+            assertEquals(1, fileSplit.getNumPaths());
+            assertEquals(1, fileSplit.getLocations().length);
+            assertEquals(hosts2[0], fileSplit.getLocations()[0]);
+          }
+          if (split.equals(splits.get(1))) {
+            assertEquals(1, fileSplit.getNumPaths());
+            assertEquals(1, fileSplit.getLocations().length);
+            assertEquals(hosts1[0], fileSplit.getLocations()[0]);
+          }
+          if (split.equals(splits.get(2))) {
+            assertEquals(2, fileSplit.getNumPaths());
+            assertEquals(1, fileSplit.getLocations().length);
+            assertEquals(hosts3[0], fileSplit.getLocations()[0]);
+          }
+        } else if (splits.size() == 2) {
+          // If rack1 is processed first
+          if (split.equals(splits.get(0))) {
+            assertEquals(2, fileSplit.getNumPaths());
+            assertEquals(1, fileSplit.getLocations().length);
+            assertEquals(hosts1[0], fileSplit.getLocations()[0]);
+          }
+          if (split.equals(splits.get(1))) {
+            assertEquals(2, fileSplit.getNumPaths());
+            assertEquals(1, fileSplit.getLocations().length);
+            assertEquals(hosts3[0], fileSplit.getLocations()[0]);
+          }
+        } else {
+          fail("Split size should be 2 or 3.");
+        }
+        for (int i = 0; i < fileSplit.getNumPaths(); i++) {
+          String name = fileSplit.getPath(i).getName();
+          long length = fileSplit.getLength(i);
+          long offset = fileSplit.getOffset(i);
+          actual.add(new Split(name, length, offset));
+        }
+      }
+      assertEquals(4, actual.size());
+      assertTrue(actual.containsAll(expected));
 
       // measure performance when there are multiple pools and
       // many files in each pool.


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


[2/4] hadoop git commit: Update CHANGES.txt for HADOOP-14867, HADOOP-14919, HDFS-12578, HDFS-12596, HDFS-8829, MAPREDUCE-6937, MAPREDUCE-6975, YARN-6959, YARN-7084

Posted by sh...@apache.org.
Update CHANGES.txt for HADOOP-14867, HADOOP-14919, HDFS-12578, HDFS-12596, HDFS-8829, MAPREDUCE-6937, MAPREDUCE-6975, YARN-6959, YARN-7084

Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/261f8abc
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/261f8abc
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/261f8abc

Branch: refs/heads/branch-2.7
Commit: 261f8abcebb90f143a73b5efd6fb802e4f0cd618
Parents: fbb44d7
Author: Konstantin V Shvachko <sh...@apache.org>
Authored: Thu Dec 7 15:04:59 2017 -0800
Committer: Konstantin V Shvachko <sh...@apache.org>
Committed: Thu Dec 7 15:04:59 2017 -0800

----------------------------------------------------------------------
 hadoop-common-project/hadoop-common/CHANGES.txt |  6 ++++++
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     | 18 ++++++++++++------
 hadoop-mapreduce-project/CHANGES.txt            |  5 +++++
 hadoop-yarn-project/CHANGES.txt                 |  6 ++++++
 4 files changed, 29 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/261f8abc/hadoop-common-project/hadoop-common/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 2f5c3fa..71617a7 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -25,6 +25,10 @@ Release 2.7.5 - UNRELEASED
     HADOOP-14827. Allow StopWatch to accept a Timer parameter for tests.
     (Erik Krogen via jlowe)
 
+    HADOOP-14867. Update HDFS Federation setup document, for incorrect
+    property name for secondary name node http address.
+    (Bharat Viswanadham via Arpit Agarwal)
+
   OPTIMIZATIONS
 
   BUG FIXES
@@ -38,6 +42,8 @@ Release 2.7.5 - UNRELEASED
     HADOOP-14902. LoadGenerator#genFile write close timing is incorrectly
     calculated. (Hanisha Koneru via jlowe)
 
+    HADOOP-14919. BZip2 drops records when reading data in splits. (Jason Lowe)
+
 Release 2.7.4 - 2017-08-04
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/261f8abc/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 8bebd37..180fc96 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -33,8 +33,11 @@ Release 2.7.5 - UNRELEASED
     HDFS-10984. Expose nntop output as metrics. (Siddharth Wagle via xyao, zhz)
 
     HDFS-9259. Make SO_SNDBUF size configurable at DFSClient side for hdfs write
-    scenario. (original patch Mingliang Liu via Ming Ma, branch-2.7 backport done
-    under HDFS-12823, Erik Krogen via zhz).
+    scenario. (original patch Mingliang Liu via Ming Ma, branch-2.7 backport
+    done under HDFS-12823, Erik Krogen via zhz).
+
+    HDFS-12596. Add TestFsck#testFsckCorruptWhenOneReplicaIsCorrupt
+    back to branch-2.7. (Xiao Chen)
 
   OPTIMIZATIONS
 
@@ -44,6 +47,10 @@ Release 2.7.5 - UNRELEASED
     HDFS-12323. NameNode terminates after full GC thinking QJM unresponsive
     if full GC is much longer than timeout. (Erik Krogen via shv)
 
+    HDFS-8829. Make SO_RCVBUF and SO_SNDBUF size configurable for
+    DataTransferProtocol sockets and allow configuring auto-tuning (He Tianyi
+    via Colin P. McCabe)
+
   BUG FIXES
 
     HDFS-12157. Do fsyncDirectory(..) outside of FSDataset lock.
@@ -60,6 +67,9 @@ Release 2.7.5 - UNRELEASED
 
     HDFS-9003. ForkJoin thread pool leaks. (Kihwal Lee via jing9)
 
+    HDFS-12578. TestDeadDatanode#testNonDFSUsedONDeadNodeReReg failing
+    in branch-2.7. (Ajay Kumar via Xiao Chen)
+
     HDFS-12832. INode.getFullPathName may throw ArrayIndexOutOfBoundsException
     lead to NameNode exit. (shv)
 
@@ -539,10 +549,6 @@ Release 2.7.3 - 2016-08-25
     HDFS-8845. DiskChecker should not traverse the entire tree (Chang Li via
     Colin P. McCabe)
 
-    HDFS-8829. Make SO_RCVBUF and SO_SNDBUF size configurable for
-    DataTransferProtocol sockets and allow configuring auto-tuning (He Tianyi
-    via Colin P. McCabe)
-
   BUG FIXES
 
     HDFS-9289. Make DataStreamer#block thread safe and verify genStamp in

http://git-wip-us.apache.org/repos/asf/hadoop/blob/261f8abc/hadoop-mapreduce-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index 9d84e6d..484b927 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -20,6 +20,11 @@ Release 2.7.5 - UNRELEASED
 
   IMPROVEMENTS
 
+    MAPREDUCE-6937. Backport MAPREDUCE-6870 to branch-2 while preserving
+    compatibility. (Peter Bacsko via Haibo Chen)
+
+    MAPREDUCE-6975. Logging task counters. (Prabhu Joseph via Naganarasimha)
+
   OPTIMIZATIONS
 
   BUG FIXES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/261f8abc/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 83b3ec1..2ebac88 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -28,6 +28,12 @@ Release 2.7.5 - UNRELEASED
     APP_ATTEMPT_REMOVED event when async-scheduling enabled in
     CapacityScheduler. (sandflee and Jonathan Hung via jlowe)
 
+    YARN-6959. RM may allocate wrong AM Container for new attempt.
+    (Yuqi Wang via Jian He)
+
+    YARN-7084. TestSchedulingMonitor#testRMStarts fails sporadically.
+    (Jason Lowe)
+
 Release 2.7.4 - 2017-08-04
 
   INCOMPATIBLE CHANGES


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


[3/4] hadoop git commit: Update CHANGES.txt for HADOOP-14867, HADOOP-14919, HDFS-12578, HDFS-12596, HDFS-8829, MAPREDUCE-6937, MAPREDUCE-6975, YARN-6959, YARN-7084

Posted by sh...@apache.org.
Update CHANGES.txt for HADOOP-14867, HADOOP-14919, HDFS-12578, HDFS-12596, HDFS-8829, MAPREDUCE-6937, MAPREDUCE-6975, YARN-6959, YARN-7084


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/110c5140
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/110c5140
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/110c5140

Branch: refs/heads/branch-2.7.5
Commit: 110c514013fdaeafbc3049b494382c46109ce6e8
Parents: 922728d
Author: Konstantin V Shvachko <sh...@apache.org>
Authored: Thu Dec 7 15:04:59 2017 -0800
Committer: Konstantin V Shvachko <sh...@apache.org>
Committed: Thu Dec 7 15:06:57 2017 -0800

----------------------------------------------------------------------
 hadoop-common-project/hadoop-common/CHANGES.txt |  6 ++++++
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     | 18 ++++++++++++------
 hadoop-mapreduce-project/CHANGES.txt            |  5 +++++
 hadoop-yarn-project/CHANGES.txt                 |  6 ++++++
 4 files changed, 29 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/110c5140/hadoop-common-project/hadoop-common/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 5b18940..4ed85aa 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -13,6 +13,10 @@ Release 2.7.5 - 2017-12-01
     HADOOP-14827. Allow StopWatch to accept a Timer parameter for tests.
     (Erik Krogen via jlowe)
 
+    HADOOP-14867. Update HDFS Federation setup document, for incorrect
+    property name for secondary name node http address.
+    (Bharat Viswanadham via Arpit Agarwal)
+
   OPTIMIZATIONS
 
   BUG FIXES
@@ -26,6 +30,8 @@ Release 2.7.5 - 2017-12-01
     HADOOP-14902. LoadGenerator#genFile write close timing is incorrectly
     calculated. (Hanisha Koneru via jlowe)
 
+    HADOOP-14919. BZip2 drops records when reading data in splits. (Jason Lowe)
+
 Release 2.7.4 - 2017-08-04
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/110c5140/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index ffc11fd..5554477 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -21,8 +21,11 @@ Release 2.7.5 - 2017-12-01
     HDFS-10984. Expose nntop output as metrics. (Siddharth Wagle via xyao, zhz)
 
     HDFS-9259. Make SO_SNDBUF size configurable at DFSClient side for hdfs write
-    scenario. (original patch Mingliang Liu via Ming Ma, branch-2.7 backport done
-    under HDFS-12823, Erik Krogen via zhz).
+    scenario. (original patch Mingliang Liu via Ming Ma, branch-2.7 backport
+    done under HDFS-12823, Erik Krogen via zhz).
+
+    HDFS-12596. Add TestFsck#testFsckCorruptWhenOneReplicaIsCorrupt
+    back to branch-2.7. (Xiao Chen)
 
   OPTIMIZATIONS
 
@@ -32,6 +35,10 @@ Release 2.7.5 - 2017-12-01
     HDFS-12323. NameNode terminates after full GC thinking QJM unresponsive
     if full GC is much longer than timeout. (Erik Krogen via shv)
 
+    HDFS-8829. Make SO_RCVBUF and SO_SNDBUF size configurable for
+    DataTransferProtocol sockets and allow configuring auto-tuning (He Tianyi
+    via Colin P. McCabe)
+
   BUG FIXES
 
     HDFS-12157. Do fsyncDirectory(..) outside of FSDataset lock.
@@ -48,6 +55,9 @@ Release 2.7.5 - 2017-12-01
 
     HDFS-9003. ForkJoin thread pool leaks. (Kihwal Lee via jing9)
 
+    HDFS-12578. TestDeadDatanode#testNonDFSUsedONDeadNodeReReg failing
+    in branch-2.7. (Ajay Kumar via Xiao Chen)
+
     HDFS-12832. INode.getFullPathName may throw ArrayIndexOutOfBoundsException
     lead to NameNode exit. (shv)
 
@@ -527,10 +537,6 @@ Release 2.7.3 - 2016-08-25
     HDFS-8845. DiskChecker should not traverse the entire tree (Chang Li via
     Colin P. McCabe)
 
-    HDFS-8829. Make SO_RCVBUF and SO_SNDBUF size configurable for
-    DataTransferProtocol sockets and allow configuring auto-tuning (He Tianyi
-    via Colin P. McCabe)
-
   BUG FIXES
 
     HDFS-9289. Make DataStreamer#block thread safe and verify genStamp in

http://git-wip-us.apache.org/repos/asf/hadoop/blob/110c5140/hadoop-mapreduce-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index d073d7c..30046e7 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -8,6 +8,11 @@ Release 2.7.5 - 2017-12-01
 
   IMPROVEMENTS
 
+    MAPREDUCE-6937. Backport MAPREDUCE-6870 to branch-2 while preserving
+    compatibility. (Peter Bacsko via Haibo Chen)
+
+    MAPREDUCE-6975. Logging task counters. (Prabhu Joseph via Naganarasimha)
+
   OPTIMIZATIONS
 
   BUG FIXES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/110c5140/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 0b89924..4d25ba3 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -16,6 +16,12 @@ Release 2.7.5 - 2017-12-01
     APP_ATTEMPT_REMOVED event when async-scheduling enabled in
     CapacityScheduler. (sandflee and Jonathan Hung via jlowe)
 
+    YARN-6959. RM may allocate wrong AM Container for new attempt.
+    (Yuqi Wang via Jian He)
+
+    YARN-7084. TestSchedulingMonitor#testRMStarts fails sporadically.
+    (Jason Lowe)
+
 Release 2.7.4 - 2017-08-04
 
   INCOMPATIBLE CHANGES


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


[4/4] hadoop git commit: Set the release date for 2.7.5-RC1

Posted by sh...@apache.org.
Set the release date for 2.7.5-RC1

Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/2c5c89fa
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/2c5c89fa
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/2c5c89fa

Branch: refs/heads/branch-2.7.5
Commit: 2c5c89fa48133bddf5e6453d738416f69694e067
Parents: 110c514
Author: Konstantin V Shvachko <sh...@apache.org>
Authored: Thu Dec 7 15:18:17 2017 -0800
Committer: Konstantin V Shvachko <sh...@apache.org>
Committed: Thu Dec 7 16:19:44 2017 -0800

----------------------------------------------------------------------
 hadoop-common-project/hadoop-common/CHANGES.txt              | 2 +-
 .../hadoop-common/src/main/docs/releasenotes.html            | 8 ++++++++
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt                  | 2 +-
 hadoop-mapreduce-project/CHANGES.txt                         | 2 +-
 hadoop-yarn-project/CHANGES.txt                              | 2 +-
 5 files changed, 12 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/2c5c89fa/hadoop-common-project/hadoop-common/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 4ed85aa..898ab2f 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -1,6 +1,6 @@
 Hadoop Change Log
 
-Release 2.7.5 - 2017-12-01
+Release 2.7.5 - 2017-12-07
 
   INCOMPATIBLE CHANGES
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2c5c89fa/hadoop-common-project/hadoop-common/src/main/docs/releasenotes.html
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/docs/releasenotes.html b/hadoop-common-project/hadoop-common/src/main/docs/releasenotes.html
index 7085e73..8bdfdfd 100644
--- a/hadoop-common-project/hadoop-common/src/main/docs/releasenotes.html
+++ b/hadoop-common-project/hadoop-common/src/main/docs/releasenotes.html
@@ -16,6 +16,10 @@ These release notes include new developer and user-facing incompatibilities, fea
      Major bug reported by Jason Lowe and fixed by Jason Lowe <br>
      <b>TestSchedulingMonitor#testRMStarts fails sporadically</b><br>
      <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-6959">YARN-6959</a>.
+     Major bug reported by Yuqi Wang and fixed by Yuqi Wang (capacity scheduler , fairscheduler , scheduler)<br>
+     <b>RM may allocate wrong AM Container for new attempt</b><br>
+     <blockquote>ResourceManager will now record ResourceRequests from different attempts into different objects.</blockquote></li>
 <li> <a href="https://issues.apache.org/jira/browse/YARN-5195">YARN-5195</a>.
      Major bug reported by Karam Singh and fixed by sandflee (resourcemanager)<br>
      <b>RM intermittently crashed with NPE while handling APP_ATTEMPT_REMOVED event when async-scheduling enabled in CapacityScheduler</b><br>
@@ -40,6 +44,10 @@ These release notes include new developer and user-facing incompatibilities, fea
      Minor bug reported by Kihwal Lee and fixed by Kihwal Lee (test)<br>
      <b>TestHSAdminServer.testRefreshSuperUserGroups is failing</b><br>
      <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-6165">MAPREDUCE-6165</a>.
+     Minor bug reported by Wei Yan and fixed by Akira Ajisaka <br>
+     <b>[JDK8] TestCombineFileInputFormat failed on JDK8</b><br>
+     <blockquote></blockquote></li>
 <li> <a href="https://issues.apache.org/jira/browse/HDFS-12832">HDFS-12832</a>.
      Critical bug reported by DENG FEI and fixed by Konstantin Shvachko (namenode)<br>
      <b>INode.getFullPathName may throw ArrayIndexOutOfBoundsException lead to NameNode exit</b><br>

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2c5c89fa/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 5554477..02b05f1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -1,6 +1,6 @@
 Hadoop HDFS Change Log
 
-Release 2.7.5 - 2017-12-01
+Release 2.7.5 - 2017-12-07
 
   INCOMPATIBLE CHANGES
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2c5c89fa/hadoop-mapreduce-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index 30046e7..6e628c6 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -1,6 +1,6 @@
 Hadoop MapReduce Change Log
 
-Release 2.7.5 - 2017-12-01
+Release 2.7.5 - 2017-12-07
 
   INCOMPATIBLE CHANGES
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2c5c89fa/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 4d25ba3..9618fa6 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -1,6 +1,6 @@
 Hadoop YARN Change Log
 
-Release 2.7.5 - 2017-12-01
+Release 2.7.5 - 2017-12-07
 
   INCOMPATIBLE CHANGES
 


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org