You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by wh...@apache.org on 2015/06/25 03:08:10 UTC

[37/50] [abbrv] hadoop git commit: HDFS-6440. Support more than 2 NameNodes. Contributed by Jesse Yates.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/49dfad94/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAConfiguration.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAConfiguration.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAConfiguration.java
index c4a2988..62643ae 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAConfiguration.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAConfiguration.java
@@ -23,10 +23,12 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.fail;
 
 import java.io.IOException;
+import java.net.MalformedURLException;
 import java.net.URI;
 import java.net.URL;
 import java.util.Collection;
 
+import com.google.common.base.Joiner;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
@@ -58,19 +60,23 @@ public class TestHAConfiguration {
     }
   }
 
-  private Configuration getHAConf(String nsId, String host1, String host2) {
+  private Configuration getHAConf(String nsId, String ... hosts) {
     Configuration conf = new Configuration();
     conf.set(DFSConfigKeys.DFS_NAMESERVICES, nsId);
-    conf.set(DFSUtil.addKeySuffixes(
-        DFSConfigKeys.DFS_HA_NAMENODES_KEY_PREFIX, nsId),
-        "nn1,nn2");    
     conf.set(DFSConfigKeys.DFS_HA_NAMENODE_ID_KEY, "nn1");
+
+    String[] nnids = new String[hosts.length];
+    for (int i = 0; i < hosts.length; i++) {
+      String nnid = "nn" + (i + 1);
+      nnids[i] = nnid;
+      conf.set(DFSUtil.addKeySuffixes(
+              DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY, nsId, nnid),
+          hosts[i] + ":12345");
+    }
+
     conf.set(DFSUtil.addKeySuffixes(
-        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY, nsId, "nn1"),
-        host1 + ":12345");
-    conf.set(DFSUtil.addKeySuffixes(
-        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY, nsId, "nn2"),
-        host2 + ":12345");
+            DFSConfigKeys.DFS_HA_NAMENODES_KEY_PREFIX, nsId),
+        Joiner.on(',').join(nnids));
     return conf;
   }
 
@@ -87,11 +93,28 @@ public class TestHAConfiguration {
     // 0.0.0.0, it should substitute the address from the RPC configuration
     // above.
     StandbyCheckpointer checkpointer = new StandbyCheckpointer(conf, fsn);
-    assertEquals(new URL("http", "1.2.3.2",
-        DFSConfigKeys.DFS_NAMENODE_HTTP_PORT_DEFAULT, ""),
-        checkpointer.getActiveNNAddress());
+    assertAddressMatches("1.2.3.2", checkpointer.getActiveNNAddresses().get(0));
+
+    //test when there are three NNs
+    // Use non-local addresses to avoid host address matching
+    conf = getHAConf("ns1", "1.2.3.1", "1.2.3.2", "1.2.3.3");
+
+    // This is done by the NN before the StandbyCheckpointer is created
+    NameNode.initializeGenericKeys(conf, "ns1", "nn1");
+
+    checkpointer = new StandbyCheckpointer(conf, fsn);
+    assertEquals("Got an unexpected number of possible active NNs", 2, checkpointer
+        .getActiveNNAddresses().size());
+    assertEquals(new URL("http", "1.2.3.2", DFSConfigKeys.DFS_NAMENODE_HTTP_PORT_DEFAULT, ""),
+        checkpointer.getActiveNNAddresses().get(0));
+    assertAddressMatches("1.2.3.2", checkpointer.getActiveNNAddresses().get(0));
+    assertAddressMatches("1.2.3.3", checkpointer.getActiveNNAddresses().get(1));
   }
-  
+
+  private void assertAddressMatches(String address, URL url) throws MalformedURLException {
+    assertEquals(new URL("http", address, DFSConfigKeys.DFS_NAMENODE_HTTP_PORT_DEFAULT, ""), url);
+  }
+
   /**
    * Tests that the namenode edits dirs and shared edits dirs are gotten with
    * duplicates removed

http://git-wip-us.apache.org/repos/asf/hadoop/blob/49dfad94/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java
index 76a62ff..3da37f5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java
@@ -24,6 +24,7 @@ import static org.junit.Assert.fail;
 
 import java.io.IOException;
 import java.security.PrivilegedExceptionAction;
+import java.util.Random;
 import java.util.concurrent.TimeoutException;
 
 import org.apache.commons.logging.Log;
@@ -81,24 +82,33 @@ public class TestPipelinesFailover {
   
   private static final int STRESS_NUM_THREADS = 25;
   private static final int STRESS_RUNTIME = 40000;
-  
+
+  private static final int NN_COUNT = 3;
+  private static final long FAILOVER_SEED = System.currentTimeMillis();
+  private static final Random failoverRandom = new Random(FAILOVER_SEED);
+  static{
+    // log the failover seed so we can reproduce the test exactly
+    LOG.info("Using random seed: " + FAILOVER_SEED
+        + " for selecting active target NN during failover");
+  }
+
   enum TestScenario {
     GRACEFUL_FAILOVER {
       @Override
-      void run(MiniDFSCluster cluster) throws IOException {
-        cluster.transitionToStandby(0);
-        cluster.transitionToActive(1);
+      void run(MiniDFSCluster cluster, int previousActive, int activeIndex) throws IOException {
+        cluster.transitionToStandby(previousActive);
+        cluster.transitionToActive(activeIndex);
       }
     },
     ORIGINAL_ACTIVE_CRASHED {
       @Override
-      void run(MiniDFSCluster cluster) throws IOException {
-        cluster.restartNameNode(0);
-        cluster.transitionToActive(1);
+      void run(MiniDFSCluster cluster, int previousActive, int activeIndex) throws IOException {
+        cluster.restartNameNode(previousActive);
+        cluster.transitionToActive(activeIndex);
       }
     };
 
-    abstract void run(MiniDFSCluster cluster) throws IOException;
+    abstract void run(MiniDFSCluster cluster, int previousActive, int activeIndex) throws IOException;
   }
   
   enum MethodToTestIdempotence {
@@ -135,10 +145,7 @@ public class TestPipelinesFailover {
     conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, 1000);
     
     FSDataOutputStream stm = null;
-    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
-      .nnTopology(MiniDFSNNTopology.simpleHATopology())
-      .numDataNodes(3)
-      .build();
+    MiniDFSCluster cluster = newMiniCluster(conf, 3);
     try {
       int sizeWritten = 0;
       
@@ -157,15 +164,15 @@ public class TestPipelinesFailover {
       // Make sure all of the blocks are written out before failover.
       stm.hflush();
 
-      LOG.info("Failing over to NN 1");
-      scenario.run(cluster);
+      LOG.info("Failing over to another NN");
+      int activeIndex = failover(cluster, scenario);
 
       // NOTE: explicitly do *not* make any further metadata calls
       // to the NN here. The next IPC call should be to allocate the next
       // block. Any other call would notice the failover and not test
       // idempotence of the operation (HDFS-3031)
       
-      FSNamesystem ns1 = cluster.getNameNode(1).getNamesystem();
+      FSNamesystem ns1 = cluster.getNameNode(activeIndex).getNamesystem();
       BlockManagerTestUtil.updateState(ns1.getBlockManager());
       assertEquals(0, ns1.getPendingReplicationBlocks());
       assertEquals(0, ns1.getCorruptReplicaBlocks());
@@ -213,10 +220,7 @@ public class TestPipelinesFailover {
     conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
     
     FSDataOutputStream stm = null;
-    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
-      .nnTopology(MiniDFSNNTopology.simpleHATopology())
-      .numDataNodes(5)
-      .build();
+    MiniDFSCluster cluster = newMiniCluster(conf, 5);
     try {
       cluster.waitActive();
       cluster.transitionToActive(0);
@@ -232,8 +236,7 @@ public class TestPipelinesFailover {
       // Make sure all the blocks are written before failover
       stm.hflush();
 
-      LOG.info("Failing over to NN 1");
-      scenario.run(cluster);
+      int nextActive = failover(cluster, scenario);
 
       assertTrue(fs.exists(TEST_PATH));
       
@@ -242,9 +245,9 @@ public class TestPipelinesFailover {
       // write another block and a half
       AppendTestUtil.write(stm, BLOCK_AND_A_HALF, BLOCK_AND_A_HALF);
       stm.hflush();
-      
-      LOG.info("Failing back to NN 0");
-      cluster.transitionToStandby(1);
+
+      LOG.info("Failing back from NN " + nextActive + " to NN 0");
+      cluster.transitionToStandby(nextActive);
       cluster.transitionToActive(0);
       
       cluster.stopDataNode(1);
@@ -262,7 +265,7 @@ public class TestPipelinesFailover {
       cluster.shutdown();
     }
   }
-  
+
   /**
    * Tests lease recovery if a client crashes. This approximates the
    * use case of HBase WALs being recovered after a NN failover.
@@ -275,10 +278,7 @@ public class TestPipelinesFailover {
     conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
     
     FSDataOutputStream stm = null;
-    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
-      .nnTopology(MiniDFSNNTopology.simpleHATopology())
-      .numDataNodes(3)
-      .build();
+    final MiniDFSCluster cluster = newMiniCluster(conf, 3);
     try {
       cluster.waitActive();
       cluster.transitionToActive(0);
@@ -329,10 +329,7 @@ public class TestPipelinesFailover {
     conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
     
     FSDataOutputStream stm = null;
-    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
-      .nnTopology(MiniDFSNNTopology.simpleHATopology())
-      .numDataNodes(3)
-      .build();
+    final MiniDFSCluster cluster = newMiniCluster(conf, 3);
     try {
       cluster.waitActive();
       cluster.transitionToActive(0);
@@ -406,7 +403,20 @@ public class TestPipelinesFailover {
       cluster.shutdown();
     }
   }
-  
+
+  /**
+   * Create a MiniCluster with the specified base configuration and the specified number of
+   * DataNodes. Helper method to ensure that the we use the same number of NNs across all the tests.
+   * @return mini cluster ready to use
+   * @throws IOException cluster cannot be started
+   */
+  private MiniDFSCluster newMiniCluster(Configuration conf, int dnCount) throws IOException {
+    return new MiniDFSCluster.Builder(conf)
+             .nnTopology(MiniDFSNNTopology.simpleHATopology(NN_COUNT))
+             .numDataNodes(dnCount)
+             .build();
+  }
+
   /**
    * Stress test for pipeline/lease recovery. Starts a number of
    * threads, each of which creates a file and has another client
@@ -485,6 +495,38 @@ public class TestPipelinesFailover {
   }
 
   /**
+   * Fail-over using the given scenario, assuming NN0 is currently active
+   * @param cluster cluster on which to run the scenario
+   * @param scenario failure scenario to run
+   * @return the index of the new active NN
+   * @throws IOException
+   */
+  private int failover(MiniDFSCluster cluster, TestScenario scenario) throws IOException {
+    return failover(cluster, scenario, 0);
+  }
+
+  /**
+   * Do a fail-over with the given scenario.
+   * @param cluster cluster on which to run the scenario
+   * @param scenario failure scenario to run
+   * @param activeIndex index of the currently active node
+   * @throws IOException on failure
+   * @return the index of the new active NN
+   */
+  private int failover(MiniDFSCluster cluster, TestScenario scenario, int activeIndex)
+      throws IOException {
+    // get index of the next node that should be active, ensuring its not the same as the currently
+    // active node
+    int nextActive = failoverRandom.nextInt(NN_COUNT);
+    if (nextActive == activeIndex) {
+      nextActive = (nextActive + 1) % NN_COUNT;
+    }
+    LOG.info("Failing over to a standby NN:" + nextActive + " from NN " + activeIndex);
+    scenario.run(cluster, activeIndex, nextActive);
+    return nextActive;
+  }
+
+  /**
    * Test thread which creates a file, has another fake user recover
    * the lease on the file, and then ensures that the file's contents
    * are properly readable. If any of these steps fails, propagates

http://git-wip-us.apache.org/repos/asf/hadoop/blob/49dfad94/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRemoteNameNodeInfo.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRemoteNameNodeInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRemoteNameNodeInfo.java
new file mode 100644
index 0000000..cb2a4fc
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRemoteNameNodeInfo.java
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.junit.Test;
+
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Test that we correctly obtain remote namenode information
+ */
+public class TestRemoteNameNodeInfo {
+
+  @Test
+  public void testParseMultipleNameNodes() throws Exception {
+    // start with an empty configuration
+    Configuration conf = new Configuration(false);
+
+    // add in keys for each of the NNs
+    String nameservice = "ns1";
+    MiniDFSNNTopology topology = new MiniDFSNNTopology()
+        .addNameservice(new MiniDFSNNTopology.NSConf(nameservice)
+            .addNN(new MiniDFSNNTopology.NNConf("nn1").setIpcPort(10001))
+            .addNN(new MiniDFSNNTopology.NNConf("nn2").setIpcPort(10002))
+            .addNN(new MiniDFSNNTopology.NNConf("nn3").setIpcPort(10003)));
+
+    // add the configurations of the NNs to the passed conf, so we can parse it back out
+    MiniDFSCluster.configureNameNodes(topology, false, conf);
+
+    // set the 'local' one as nn1
+    conf.set(DFSConfigKeys.DFS_HA_NAMENODE_ID_KEY, "nn1");
+
+    List<RemoteNameNodeInfo> nns = RemoteNameNodeInfo.getRemoteNameNodes(conf);
+
+    // make sure it matches when we pass in the nameservice
+    List<RemoteNameNodeInfo> nns2 = RemoteNameNodeInfo.getRemoteNameNodes(conf,
+        nameservice);
+    assertEquals(nns, nns2);
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hadoop/blob/49dfad94/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestSeveralNameNodes.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestSeveralNameNodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestSeveralNameNodes.java
new file mode 100644
index 0000000..dbe8070
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestSeveralNameNodes.java
@@ -0,0 +1,179 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.test.MultithreadedTestUtil.RepeatingTestThread;
+import org.apache.hadoop.test.MultithreadedTestUtil.TestContext;
+import org.junit.Test;
+
+/**
+ * Test that we can start several and run with namenodes on the same minicluster
+ */
+public class TestSeveralNameNodes {
+
+  private static final Log LOG = LogFactory.getLog(TestSeveralNameNodes.class);
+
+  /** ms between failovers between NNs */
+  private static final int TIME_BETWEEN_FAILOVERS = 200;
+  private static final int NUM_NAMENODES = 3;
+  private static final int NUM_THREADS = 3;
+  private static final int LIST_LENGTH = 50;
+  /** ms for length of test */
+  private static final long RUNTIME = 100000;
+
+  @Test
+  public void testCircularLinkedListWrites() throws Exception {
+    HAStressTestHarness harness = new HAStressTestHarness();
+    // setup the harness
+    harness.setNumberOfNameNodes(NUM_NAMENODES);
+    harness.addFailoverThread(TIME_BETWEEN_FAILOVERS);
+
+    final MiniDFSCluster cluster = harness.startCluster();
+    try {
+      cluster.waitActive();
+      cluster.transitionToActive(0);
+
+      // setup the a circular writer
+      FileSystem fs = harness.getFailoverFs();
+      TestContext context = harness.testCtx;
+      List<CircularWriter> writers = new ArrayList<CircularWriter>();
+      for (int i = 0; i < NUM_THREADS; i++) {
+        Path p = new Path("/test-" + i);
+        fs.mkdirs(p);
+        CircularWriter writer = new CircularWriter(context, LIST_LENGTH, fs, p);
+        writers.add(writer);
+        context.addThread(writer);
+      }
+      harness.startThreads();
+
+      // wait for all the writer threads to finish, or that we exceed the time
+      long start = System.currentTimeMillis();
+      while ((System.currentTimeMillis() - start) < RUNTIME) {
+        for (int i = 0; i < writers.size(); i++) {
+          CircularWriter writer = writers.get(i);
+          // remove the writer from the ones to check
+          if (writer.done.await(10, TimeUnit.MILLISECONDS)) {
+            writers.remove(i--);
+          }
+        }
+      }
+      assertEquals(
+          "Some writers didn't complete in expected runtime! Current writer state:"
+              + writers, 0,
+          writers.size());
+
+      harness.stopThreads();
+    } finally {
+      System.err.println("===========================\n\n\n\n");
+      harness.shutdown();
+    }
+  }
+
+  private static class CircularWriter extends RepeatingTestThread {
+
+    private final int maxLength;
+    private final Path dir;
+    private final FileSystem fs;
+    private int currentListIndex = 0;
+    private CountDownLatch done = new CountDownLatch(1);
+
+    public CircularWriter(TestContext context, int listLength, FileSystem fs,
+        Path parentDir) {
+      super(context);
+      this.fs = fs;
+      this.maxLength = listLength;
+      this.dir = parentDir;
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder builder = new StringBuilder("Circular Writer:\n");
+      builder.append("\t directory: " + dir + "\n");
+      builder.append("\t target length: " + maxLength + "\n");
+      // might be a little racy, but we just want a close count
+      builder.append("\t current item: " + currentListIndex + "\n");
+      builder.append("\t done: " + (done.getCount() == 0) + "\n");
+      return builder.toString();
+    }
+
+    @Override
+    public void doAnAction() throws Exception {
+      if (currentListIndex == maxLength) {
+        checkList();
+        this.stopTestThread();
+        done.countDown();
+      } else {
+        writeList();
+      }
+    }
+
+    /**
+     * Make sure we can traverse the entire linked list
+     */
+    private void checkList() throws IOException {
+      for (int i = 0; i < maxLength; i++) {
+        Path nextFile = getNextFile(i);
+        if (!fs.exists(nextFile)) {
+          throw new RuntimeException("Next file " + nextFile
+              + " for list does not exist!");
+        }
+        // read the next file name
+        FSDataInputStream in = fs.open(nextFile);
+        nextFile = getNextFile(in.read());
+        in.close();
+      }
+
+    }
+
+    private void cleanup() throws IOException {
+      if (!fs.delete(dir, true)) {
+        throw new RuntimeException("Didn't correctly delete " + dir);
+      }
+      if (!fs.mkdirs(dir)) {
+        throw new RuntimeException("Didn't correctly make directory " + dir);
+      }
+    }
+
+    private void writeList() throws IOException {
+      Path nextPath = getNextFile(currentListIndex++);
+      LOG.info("Writing next file: " + nextPath);
+      FSDataOutputStream file = fs.create(nextPath);
+      file.write(currentListIndex);
+      file.close();
+    }
+
+    private Path getNextFile(int i) {
+      return new Path(dir, Integer.toString(i));
+    }
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hadoop/blob/49dfad94/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
index 33af0e2..cd32502 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
@@ -63,8 +63,9 @@ import static org.junit.Assert.*;
 
 public class TestStandbyCheckpoints {
   private static final int NUM_DIRS_IN_LOG = 200000;
+  protected static int NUM_NNS = 3;
   protected MiniDFSCluster cluster;
-  protected NameNode nn0, nn1;
+  protected NameNode[] nns = new NameNode[NUM_NNS];
   protected FileSystem fs;
   private final Random random = new Random();
   protected File tmpOivImgDir;
@@ -88,7 +89,8 @@ public class TestStandbyCheckpoints {
         MiniDFSNNTopology topology = new MiniDFSNNTopology()
             .addNameservice(new MiniDFSNNTopology.NSConf("ns1")
                 .addNN(new MiniDFSNNTopology.NNConf("nn1").setHttpPort(basePort))
-                .addNN(new MiniDFSNNTopology.NNConf("nn2").setHttpPort(basePort + 1)));
+                .addNN(new MiniDFSNNTopology.NNConf("nn2").setHttpPort(basePort + 1))
+                .addNN(new MiniDFSNNTopology.NNConf("nn3").setHttpPort(basePort + 2)));
 
         cluster = new MiniDFSCluster.Builder(conf)
             .nnTopology(topology)
@@ -96,8 +98,8 @@ public class TestStandbyCheckpoints {
             .build();
         cluster.waitActive();
 
-        nn0 = cluster.getNameNode(0);
-        nn1 = cluster.getNameNode(1);
+        setNNs();
+
         fs = HATestUtil.configureFailoverFs(cluster, conf);
 
         cluster.transitionToActive(0);
@@ -110,6 +112,12 @@ public class TestStandbyCheckpoints {
     }
   }
 
+  protected void setNNs(){
+    for (int i = 0; i < NUM_NNS; i++) {
+      nns[i] = cluster.getNameNode(i);
+    }
+  }
+
   protected Configuration setupCommonConfig() {
     tmpOivImgDir = Files.createTempDir();
 
@@ -136,10 +144,10 @@ public class TestStandbyCheckpoints {
 
   @Test(timeout = 300000)
   public void testSBNCheckpoints() throws Exception {
-    JournalSet standbyJournalSet = NameNodeAdapter.spyOnJournalSet(nn1);
-    
+    JournalSet standbyJournalSet = NameNodeAdapter.spyOnJournalSet(nns[1]);
+
     doEdits(0, 10);
-    HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
+    HATestUtil.waitForStandbyToCatchUp(nns[0], nns[1]);
     // Once the standby catches up, it should notice that it needs to
     // do a checkpoint and save one to its local directories.
     HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(12));
@@ -147,10 +155,9 @@ public class TestStandbyCheckpoints {
     GenericTestUtils.waitFor(new Supplier<Boolean>() {
       @Override
       public Boolean get() {
-        if(tmpOivImgDir.list().length > 0) {
+        if (tmpOivImgDir.list().length > 0) {
           return true;
-        }
-        else {
+        } else {
           return false;
         }
       }
@@ -189,9 +196,9 @@ public class TestStandbyCheckpoints {
     HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(12));
     HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(12));
     
-    assertEquals(12, nn0.getNamesystem().getFSImage()
+    assertEquals(12, nns[0].getNamesystem().getFSImage()
         .getMostRecentCheckpointTxId());
-    assertEquals(12, nn1.getNamesystem().getFSImage()
+    assertEquals(12, nns[1].getNamesystem().getFSImage()
         .getMostRecentCheckpointTxId());
     
     List<File> dirs = Lists.newArrayList();
@@ -214,17 +221,17 @@ public class TestStandbyCheckpoints {
     cluster.getConfiguration(1).setInt(
         DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, 0);
     cluster.restartNameNode(1);
-    nn1 = cluster.getNameNode(1);
- 
-    FSImage spyImage1 = NameNodeAdapter.spyOnFsImage(nn1);
-    
+    nns[1] = cluster.getNameNode(1);
+
+    FSImage spyImage1 = NameNodeAdapter.spyOnFsImage(nns[1]);
+
     // We shouldn't save any checkpoints at txid=0
     Thread.sleep(1000);
     Mockito.verify(spyImage1, Mockito.never())
       .saveNamespace((FSNamesystem) Mockito.anyObject());
  
     // Roll the primary and wait for the standby to catch up
-    HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
+    HATestUtil.waitForStandbyToCatchUp(nns[0], nns[1]);
     Thread.sleep(2000);
     
     // We should make exactly one checkpoint at this new txid. 
@@ -259,7 +266,7 @@ public class TestStandbyCheckpoints {
     cluster.getConfiguration(1).setInt(
         DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, 0);
     cluster.restartNameNode(1);
-    nn1 = cluster.getNameNode(1);
+    nns[1] = cluster.getNameNode(1);
 
     cluster.transitionToActive(0);    
     
@@ -284,31 +291,42 @@ public class TestStandbyCheckpoints {
   @Test(timeout=60000)
   public void testCheckpointCancellationDuringUpload() throws Exception {
     // don't compress, we want a big image
-    cluster.getConfiguration(0).setBoolean(
-        DFSConfigKeys.DFS_IMAGE_COMPRESS_KEY, false);
-    cluster.getConfiguration(1).setBoolean(
-        DFSConfigKeys.DFS_IMAGE_COMPRESS_KEY, false);
+    for (int i = 0; i < NUM_NNS; i++) {
+      cluster.getConfiguration(i).setBoolean(
+          DFSConfigKeys.DFS_IMAGE_COMPRESS_KEY, false);
+    }
+
     // Throttle SBN upload to make it hang during upload to ANN
-    cluster.getConfiguration(1).setLong(
-        DFSConfigKeys.DFS_IMAGE_TRANSFER_RATE_KEY, 100);
-    cluster.restartNameNode(0);
-    cluster.restartNameNode(1);
-    nn0 = cluster.getNameNode(0);
-    nn1 = cluster.getNameNode(1);
+    for (int i = 1; i < NUM_NNS; i++) {
+      cluster.getConfiguration(i).setLong(
+          DFSConfigKeys.DFS_IMAGE_TRANSFER_RATE_KEY, 100);
+    }
+    for (int i = 0; i < NUM_NNS; i++) {
+      cluster.restartNameNode(i);
+    }
+
+    // update references to each of the nns
+    setNNs();
 
     cluster.transitionToActive(0);
 
     doEdits(0, 100);
-    HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
-    HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(104));
+
+    for (int i = 1; i < NUM_NNS; i++) {
+      HATestUtil.waitForStandbyToCatchUp(nns[0], nns[i]);
+      HATestUtil.waitForCheckpoint(cluster, i, ImmutableList.of(104));
+    }
+
     cluster.transitionToStandby(0);
     cluster.transitionToActive(1);
 
+
     // Wait to make sure background TransferFsImageUpload thread was cancelled.
     // This needs to be done before the next test in the suite starts, so that a
     // file descriptor is not held open during the next cluster init.
     cluster.shutdown();
     cluster = null;
+
     GenericTestUtils.waitFor(new Supplier<Boolean>() {
       @Override
       public Boolean get() {
@@ -325,7 +343,7 @@ public class TestStandbyCheckpoints {
     }, 1000, 30000);
 
     // Assert that former active did not accept the canceled checkpoint file.
-    assertEquals(0, nn0.getFSImage().getMostRecentCheckpointTxId());
+    assertEquals(0, nns[0].getFSImage().getMostRecentCheckpointTxId());
   }
   
   /**
@@ -337,7 +355,7 @@ public class TestStandbyCheckpoints {
   public void testStandbyExceptionThrownDuringCheckpoint() throws Exception {
     
     // Set it up so that we know when the SBN checkpoint starts and ends.
-    FSImage spyImage1 = NameNodeAdapter.spyOnFsImage(nn1);
+    FSImage spyImage1 = NameNodeAdapter.spyOnFsImage(nns[1]);
     DelayAnswer answerer = new DelayAnswer(LOG);
     Mockito.doAnswer(answerer).when(spyImage1)
         .saveNamespace(Mockito.any(FSNamesystem.class),
@@ -345,7 +363,7 @@ public class TestStandbyCheckpoints {
 
     // Perform some edits and wait for a checkpoint to start on the SBN.
     doEdits(0, 1000);
-    nn0.getRpcServer().rollEditLog();
+    nns[0].getRpcServer().rollEditLog();
     answerer.waitForCall();
     assertTrue("SBN is not performing checkpoint but it should be.",
         answerer.getFireCount() == 1 && answerer.getResultCount() == 0);
@@ -355,7 +373,7 @@ public class TestStandbyCheckpoints {
     ThreadUtil.sleepAtLeastIgnoreInterrupts(1000);
     try {
       // Perform an RPC to the SBN and make sure it throws a StandbyException.
-      nn1.getRpcServer().getFileInfo("/");
+      nns[1].getRpcServer().getFileInfo("/");
       fail("Should have thrown StandbyException, but instead succeeded.");
     } catch (StandbyException se) {
       GenericTestUtils.assertExceptionContains("is not supported", se);
@@ -382,7 +400,7 @@ public class TestStandbyCheckpoints {
   public void testReadsAllowedDuringCheckpoint() throws Exception {
     
     // Set it up so that we know when the SBN checkpoint starts and ends.
-    FSImage spyImage1 = NameNodeAdapter.spyOnFsImage(nn1);
+    FSImage spyImage1 = NameNodeAdapter.spyOnFsImage(nns[1]);
     DelayAnswer answerer = new DelayAnswer(LOG);
     Mockito.doAnswer(answerer).when(spyImage1)
         .saveNamespace(Mockito.any(FSNamesystem.class),
@@ -391,7 +409,7 @@ public class TestStandbyCheckpoints {
     
     // Perform some edits and wait for a checkpoint to start on the SBN.
     doEdits(0, 1000);
-    nn0.getRpcServer().rollEditLog();
+    nns[0].getRpcServer().rollEditLog();
     answerer.waitForCall();
     assertTrue("SBN is not performing checkpoint but it should be.",
         answerer.getFireCount() == 1 && answerer.getResultCount() == 0);
@@ -405,7 +423,7 @@ public class TestStandbyCheckpoints {
       @Override
       public void run() {
         try {
-          nn1.getRpcServer().restoreFailedStorage("false");
+          nns[1].getRpcServer().restoreFailedStorage("false");
         } catch (IOException e) {
           e.printStackTrace();
         }
@@ -415,16 +433,16 @@ public class TestStandbyCheckpoints {
     
     // Make sure that our thread is waiting for the lock.
     ThreadUtil.sleepAtLeastIgnoreInterrupts(1000);
-    
-    assertFalse(nn1.getNamesystem().getFsLockForTests().hasQueuedThreads());
-    assertFalse(nn1.getNamesystem().getFsLockForTests().isWriteLocked());
-    assertTrue(nn1.getNamesystem().getCpLockForTests().hasQueuedThreads());
-    
+
+    assertFalse(nns[1].getNamesystem().getFsLockForTests().hasQueuedThreads());
+    assertFalse(nns[1].getNamesystem().getFsLockForTests().isWriteLocked());
+    assertTrue(nns[1].getNamesystem().getCpLockForTests().hasQueuedThreads());
+
     // Get /jmx of the standby NN web UI, which will cause the FSNS read lock to
     // be taken.
     String pageContents = DFSTestUtil.urlGet(new URL("http://" +
-        nn1.getHttpAddress().getHostName() + ":" +
-        nn1.getHttpAddress().getPort() + "/jmx"));
+        nns[1].getHttpAddress().getHostName() + ":" +
+        nns[1].getHttpAddress().getPort() + "/jmx"));
     assertTrue(pageContents.contains("NumLiveDataNodes"));
     
     // Make sure that the checkpoint is still going on, implying that the client

http://git-wip-us.apache.org/repos/asf/hadoop/blob/49dfad94/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-0.23-reserved.tgz
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-0.23-reserved.tgz b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-0.23-reserved.tgz
index 0f53f2a..abc7bbd 100644
Binary files a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-0.23-reserved.tgz and b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-0.23-reserved.tgz differ

http://git-wip-us.apache.org/repos/asf/hadoop/blob/49dfad94/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-1-reserved.tgz
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-1-reserved.tgz b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-1-reserved.tgz
index 737ad2d..b3f8b9d 100644
Binary files a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-1-reserved.tgz and b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-1-reserved.tgz differ

http://git-wip-us.apache.org/repos/asf/hadoop/blob/49dfad94/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-2-reserved.tgz
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-2-reserved.tgz b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-2-reserved.tgz
index 3cb2ee6..2256fba 100644
Binary files a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-2-reserved.tgz and b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-2-reserved.tgz differ

http://git-wip-us.apache.org/repos/asf/hadoop/blob/49dfad94/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-22-dfs-dir.tgz
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-22-dfs-dir.tgz b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-22-dfs-dir.tgz
index b69741c..c4959b4 100644
Binary files a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-22-dfs-dir.tgz and b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-22-dfs-dir.tgz differ

http://git-wip-us.apache.org/repos/asf/hadoop/blob/49dfad94/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop1-bbw.tgz
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop1-bbw.tgz b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop1-bbw.tgz
index 2574f8b..e7d3fbd 100644
Binary files a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop1-bbw.tgz and b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop1-bbw.tgz differ

http://git-wip-us.apache.org/repos/asf/hadoop/blob/49dfad94/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/log4j.properties b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/log4j.properties
index ef3e249..c671ccc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/log4j.properties
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/log4j.properties
@@ -20,4 +20,4 @@ log4j.rootLogger=info,stdout
 log4j.threshold=ALL
 log4j.appender.stdout=org.apache.log4j.ConsoleAppender
 log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
-log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} (%F:%M(%L)) - %m%n