You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by to...@apache.org on 2012/01/24 00:42:17 UTC

svn commit: r1235067 - in /hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs: ./ src/main/java/org/apache/hadoop/hdfs/server/namenode/ src/test/java/org/apache/hadoop/hdfs/ src/test/java/org/apache/hadoop/hdfs/server/namenode/

Author: todd
Date: Mon Jan 23 23:42:17 2012
New Revision: 1235067

URL: http://svn.apache.org/viewvc?rev=1235067&view=rev
Log:
HDFS-2826. Add test case for HDFS-1476 (safemode can initialize replication queues before exiting) (todd)

Modified:
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1235067&r1=1235066&r2=1235067&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Mon Jan 23 23:42:17 2012
@@ -293,6 +293,9 @@ Release 0.23.1 - UNRELEASED
     HDFS-2825. Add test hook to turn off the writer preferring its local
     DN. (todd)
 
+    HDFS-2826. Add test case for HDFS-1476 (safemode can initialize
+    replication queues before exiting) (todd)
+
   BUG FIXES
 
     HDFS-2541. For a sufficiently large value of blocks, the DN Scanner 

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1235067&r1=1235066&r2=1235067&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Mon Jan 23 23:42:17 2012
@@ -174,6 +174,8 @@ import org.mortbay.util.ajax.JSON;
 
 import com.google.common.base.Preconditions;
 
+import com.google.common.annotations.VisibleForTesting;
+
 /***************************************************
  * FSNamesystem does the actual bookkeeping work for the
  * DataNode.
@@ -2890,7 +2892,7 @@ public class FSNamesystem implements Nam
     /** Total number of blocks. */
     int blockTotal; 
     /** Number of safe blocks. */
-    private int blockSafe;
+    int blockSafe;
     /** Number of blocks needed to satisfy safe mode threshold condition */
     private int blockThreshold;
     /** Number of blocks needed before populating replication queues */
@@ -2898,7 +2900,7 @@ public class FSNamesystem implements Nam
     /** time of the last status printout */
     private long lastStatusReport = 0;
     /** flag indicating whether replication queues have been initialized */
-    private boolean initializedReplQueues = false;
+    boolean initializedReplQueues = false;
     /** Was safemode entered automatically because available resources were low. */
     private boolean resourcesLow = false;
     
@@ -3028,9 +3030,7 @@ public class FSNamesystem implements Nam
      */
     private synchronized void initializeReplQueues() {
       LOG.info("initializing replication queues");
-      if (isPopulatingReplQueues()) {
-        LOG.warn("Replication queues already initialized.");
-      }
+      assert !isPopulatingReplQueues() : "Already initialized repl queues";
       long startTimeMisReplicatedScan = now();
       blockManager.processMisReplicatedBlocks();
       initializedReplQueues = true;
@@ -4484,4 +4484,9 @@ public class FSNamesystem implements Nam
       byte[] password) throws InvalidToken {
     getDelegationTokenSecretManager().verifyToken(identifier, password);
   }
+
+  @VisibleForTesting
+  public SafeModeInfo getSafeModeInfoForTests() {
+    return safeMode;
+  }
 }

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java?rev=1235067&r1=1235066&r2=1235067&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java Mon Jan 23 23:42:17 2012
@@ -26,22 +26,29 @@ import org.apache.hadoop.fs.FSDataOutput
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
+import org.apache.hadoop.test.GenericTestUtils;
 
 import static org.junit.Assert.*;
 import org.junit.Before;
 import org.junit.After;
 import org.junit.Test;
 
+import com.google.common.base.Supplier;
 import com.google.common.collect.Lists;
 
 /**
  * Tests to verify safe mode correctness.
  */
 public class TestSafeMode {
+  private static final Path TEST_PATH = new Path("/test");
   private static final int BLOCK_SIZE = 1024;
   Configuration conf; 
   MiniDFSCluster cluster;
@@ -92,7 +99,7 @@ public class TestSafeMode {
     
     // create two files with one block each.
     DFSTestUtil.createFile(fs, file1, 1000, (short)1, 0);
-    DFSTestUtil.createFile(fs, file2, 2000, (short)1, 0);
+    DFSTestUtil.createFile(fs, file2, 1000, (short)1, 0);
     fs.close();
     cluster.shutdown();
     
@@ -136,6 +143,66 @@ public class TestSafeMode {
     String status = cluster.getNameNode().getNamesystem().getSafemode();
     assertEquals("", status);
   }
+  
+  /**
+   * Test that the NN initializes its under-replicated blocks queue
+   * before it is ready to exit safemode (HDFS-1476)
+   */
+  @Test(timeout=45000)
+  public void testInitializeReplQueuesEarly() throws Exception {
+    // Spray the blocks around the cluster when we add DNs instead of
+    // concentrating all blocks on the first node.
+    BlockManagerTestUtil.setWritingPrefersLocalNode(
+        cluster.getNamesystem().getBlockManager(), false);
+    
+    cluster.startDataNodes(conf, 2, true, StartupOption.REGULAR, null);
+    cluster.waitActive();
+    DFSTestUtil.createFile(fs, TEST_PATH, 15*BLOCK_SIZE, (short)1, 1L);
+    
+    
+    List<DataNodeProperties> dnprops = Lists.newLinkedList();
+    dnprops.add(cluster.stopDataNode(0));
+    dnprops.add(cluster.stopDataNode(0));
+    dnprops.add(cluster.stopDataNode(0));
+    
+    cluster.getConfiguration(0).setFloat(
+        DFSConfigKeys.DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY, 1f/15f);
+    
+    cluster.restartNameNode();
+    final NameNode nn = cluster.getNameNode();
+    
+    String status = nn.getNamesystem().getSafemode();
+    assertEquals("Safe mode is ON.The reported blocks 0 needs additional " +
+        "15 blocks to reach the threshold 0.9990 of total blocks 15. " +
+        "Safe mode will be turned off automatically.", status);
+    assertFalse("Mis-replicated block queues should not be initialized " +
+        "until threshold is crossed",
+        NameNodeAdapter.safeModeInitializedReplQueues(nn));
+    
+    cluster.restartDataNode(dnprops.remove(0));
+
+    // Wait for the block report from the restarted DN to come in.
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        return NameNodeAdapter.getSafeModeSafeBlocks(nn) > 0;
+      }
+    }, 10, 10000);
+    // SafeMode is fine-grain synchronized, so the processMisReplicatedBlocks
+    // call is still going on at this point - wait until it's done by grabbing
+    // the lock.
+    nn.getNamesystem().writeLock();
+    nn.getNamesystem().writeUnlock();
+    int safe = NameNodeAdapter.getSafeModeSafeBlocks(nn);
+    assertTrue("Expected first block report to make some but not all blocks " +
+        "safe. Got: " + safe, safe >= 1 && safe < 15);
+    BlockManagerTestUtil.updateState(nn.getNamesystem().getBlockManager());
+    
+    assertTrue(NameNodeAdapter.safeModeInitializedReplQueues(nn));
+    assertEquals(15 - safe, nn.getNamesystem().getUnderReplicatedBlocks());
+    
+    cluster.restartDataNodes();
+  }
 
   /**
    * Test that, when under-replicated blocks are processed at the end of
@@ -290,4 +357,4 @@ public class TestSafeMode {
     assertEquals("", cluster.getNamesystem().getSafemode());
   }
 
-}
\ No newline at end of file
+}

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java?rev=1235067&r1=1235066&r2=1235067&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java Mon Jan 23 23:42:17 2012
@@ -24,6 +24,7 @@ import org.apache.hadoop.hdfs.protocol.L
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem.SafeModeInfo;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.ipc.Server;
 
@@ -97,4 +98,28 @@ public class NameNodeAdapter {
       ns.readUnlock();
     }
   }
+  
+  /**
+   * @return the number of blocks marked safe by safemode, or -1
+   * if safemode is not running.
+   */
+  public static int getSafeModeSafeBlocks(NameNode nn) {
+    SafeModeInfo smi = nn.getNamesystem().getSafeModeInfoForTests();
+    if (smi == null) {
+      return -1;
+    }
+    return smi.blockSafe;
+  }
+  
+  /**
+   * @return true if safemode is not running, or if safemode has already
+   * initialized the replication queues
+   */
+  public static boolean safeModeInitializedReplQueues(NameNode nn) {
+    SafeModeInfo smi = nn.getNamesystem().getSafeModeInfoForTests();
+    if (smi == null) {
+      return true;
+    }
+    return smi.initializedReplQueues;
+  }
 }