You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by ki...@apache.org on 2013/07/01 22:32:36 UTC

svn commit: r1498672 - in /hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs: ./ src/main/java/org/apache/hadoop/hdfs/server/namenode/ src/test/java/org/apache/hadoop/hdfs/ src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/

Author: kihwal
Date: Mon Jul  1 20:32:36 2013
New Revision: 1498672

URL: http://svn.apache.org/r1498672
Log:
svn merge -c 1498665 Merging from trunk to branch-2 to fix HDFS-4888.

Modified:
    hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
    hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
    hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java
    hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java

Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1498672&r1=1498671&r2=1498672&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Mon Jul  1 20:32:36 2013
@@ -434,6 +434,9 @@ Release 2.1.0-beta - 2013-07-02
     HDFS-4944. WebHDFS cannot create a file path containing characters that must
     be URI-encoded, such as space. (cnauroth)
 
+    HDFS-4888. Refactor and fix FSNamesystem.getTurnOffTip. (Ravi Prakash via
+    kihwal)
+
   BREAKDOWN OF HDFS-347 SUBTASKS AND RELATED JIRAS
 
     HDFS-4353. Encapsulate connections to peers in Peer and PeerServer classes.

Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1498672&r1=1498671&r2=1498672&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Mon Jul  1 20:32:36 2013
@@ -4014,9 +4014,9 @@ public class FSNamesystem implements Nam
       
     // internal fields
     /** Time when threshold was reached.
-     * 
-     * <br>-1 safe mode is off
-     * <br> 0 safe mode is on, but threshold is not reached yet 
+     * <br> -1 safe mode is off
+     * <br> 0 safe mode is on, and threshold is not reached yet
+     * <br> >0 safe mode is on, but we are in extension period 
      */
     private long reached = -1;  
     /** Total number of blocks. */
@@ -4140,7 +4140,8 @@ public class FSNamesystem implements Nam
       NameNode.stateChangeLog.info("STATE* Leaving safe mode after " 
                                     + timeInSafemode/1000 + " secs");
       NameNode.getNameNodeMetrics().setSafeModeTime((int) timeInSafemode);
-      
+
+      //Log the following only once (when transitioning from ON -> OFF)
       if (reached >= 0) {
         NameNode.stateChangeLog.info("STATE* Safe mode is OFF"); 
       }
@@ -4321,62 +4322,56 @@ public class FSNamesystem implements Nam
      * A tip on how safe mode is to be turned off: manually or automatically.
      */
     String getTurnOffTip() {
-      if(reached < 0)
+      if(!isOn())
         return "Safe mode is OFF.";
-      String leaveMsg = "";
+
+      //Manual OR low-resource safemode. (Admin intervention required)
+      String leaveMsg = "It was turned on manually. ";
       if (areResourcesLow()) {
-        leaveMsg = "Resources are low on NN. " 
-        	+ "Please add or free up more resources then turn off safe mode manually.  "
-        	+ "NOTE:  If you turn off safe mode before adding resources, "
-        	+ "the NN will immediately return to safe mode.";
-      } else {
-        leaveMsg = "Safe mode will be turned off automatically";
+        leaveMsg = "Resources are low on NN. Please add or free up more "
+          + "resources then turn off safe mode manually. NOTE:  If you turn off"
+          + " safe mode before adding resources, "
+          + "the NN will immediately return to safe mode. ";
+      }
+      if (isManual() || areResourcesLow()) {
+        return leaveMsg
+          + "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off.";
       }
-      if(isManual() && !areResourcesLow()) {
-        leaveMsg = "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off";
-      }
-
-      if(blockTotal < 0)
-        return leaveMsg + ".";
 
+      //Automatic safemode. System will come out of safemode automatically.
+      leaveMsg = "Safe mode will be turned off automatically";
       int numLive = getNumLiveDataNodes();
       String msg = "";
       if (reached == 0) {
         if (blockSafe < blockThreshold) {
           msg += String.format(
             "The reported blocks %d needs additional %d"
-            + " blocks to reach the threshold %.4f of total blocks %d.",
+            + " blocks to reach the threshold %.4f of total blocks %d.\n",
             blockSafe, (blockThreshold - blockSafe) + 1, threshold, blockTotal);
         }
         if (numLive < datanodeThreshold) {
-          if (!"".equals(msg)) {
-            msg += "\n";
-          }
           msg += String.format(
             "The number of live datanodes %d needs an additional %d live "
-            + "datanodes to reach the minimum number %d.",
+            + "datanodes to reach the minimum number %d.\n",
             numLive, (datanodeThreshold - numLive), datanodeThreshold);
         }
-        msg += " " + leaveMsg;
       } else {
         msg = String.format("The reported blocks %d has reached the threshold"
-            + " %.4f of total blocks %d.", blockSafe, threshold, 
-            blockTotal);
+            + " %.4f of total blocks %d. ", blockSafe, threshold, blockTotal);
 
-        if (datanodeThreshold > 0) {
-          msg += String.format(" The number of live datanodes %d has reached "
-                               + "the minimum number %d.",
+        msg += String.format("The number of live datanodes %d has reached "
+                               + "the minimum number %d. ",
                                numLive, datanodeThreshold);
-        }
-        msg += " " + leaveMsg;
       }
+      msg += leaveMsg;
       // threshold is not reached or manual or resources low
       if(reached == 0 || (isManual() && !areResourcesLow())) {
-        return msg + ".";
+        return msg;
       }
       // extension period is in progress
-      return msg + " in " + Math.abs(reached + extension - now()) / 1000
-          + " seconds.";
+      return msg + (reached + extension - now() > 0 ?
+        " in " + (reached + extension - now()) / 1000 + " seconds."
+        : " soon.");
     }
 
     /**
@@ -5631,7 +5626,7 @@ public class FSNamesystem implements Nam
   public String getSafemode() {
     if (!this.isInSafeMode())
       return "";
-    return "Safe mode is ON." + this.getSafeModeTip();
+    return "Safe mode is ON. " + this.getSafeModeTip();
   }
 
   @Override // NameNodeMXBean

Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java?rev=1498672&r1=1498671&r2=1498672&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java (original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java Mon Jul  1 20:32:36 2013
@@ -178,9 +178,9 @@ public class TestSafeMode {
     final NameNode nn = cluster.getNameNode();
     
     String status = nn.getNamesystem().getSafemode();
-    assertEquals("Safe mode is ON.The reported blocks 0 needs additional " +
-        "15 blocks to reach the threshold 0.9990 of total blocks 15. " +
-        "Safe mode will be turned off automatically.", status);
+    assertEquals("Safe mode is ON. The reported blocks 0 needs additional " +
+        "15 blocks to reach the threshold 0.9990 of total blocks 15.\n" +
+        "Safe mode will be turned off automatically", status);
     assertFalse("Mis-replicated block queues should not be initialized " +
         "until threshold is crossed",
         NameNodeAdapter.safeModeInitializedReplQueues(nn));
@@ -353,10 +353,10 @@ public class TestSafeMode {
     fs = cluster.getFileSystem();
 
     String tipMsg = cluster.getNamesystem().getSafemode();
-    assertTrue("Safemode tip message looks right: " + tipMsg,
+    assertTrue("Safemode tip message doesn't look right: " + tipMsg,
                tipMsg.contains("The number of live datanodes 0 needs an additional " +
-                               "1 live datanodes to reach the minimum number 1. " +
-                               "Safe mode will be turned off automatically."));
+                               "1 live datanodes to reach the minimum number 1.\n" +
+                               "Safe mode will be turned off automatically"));
 
     // Start a datanode
     cluster.startDataNodes(conf, 1, true, null, null);

Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java?rev=1498672&r1=1498671&r2=1498672&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java (original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java Mon Jul  1 20:32:36 2013
@@ -206,11 +206,11 @@ public class TestHASafeMode {
     // We expect it not to be stuck in safemode, since those blocks
     // that are already visible to the SBN should be processed
     // in the initial block reports.
-    assertSafeMode(nn1, 3, 3);
+    assertSafeMode(nn1, 3, 3, 3, 0);
 
     banner("Waiting for standby to catch up to active namespace");
     HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
-    assertSafeMode(nn1, 8, 8);
+    assertSafeMode(nn1, 8, 8, 3, 0);
   }
   
   /**
@@ -230,7 +230,7 @@ public class TestHASafeMode {
     banner("Restarting standby");
     restartStandby();
     
-    assertSafeMode(nn1, 3, 3);
+    assertSafeMode(nn1, 3, 3, 3, 0);
     
     // Create a few blocks which will send blockReceived calls to the
     // SBN.
@@ -241,7 +241,7 @@ public class TestHASafeMode {
     banner("Waiting for standby to catch up to active namespace");
     HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
 
-    assertSafeMode(nn1, 8, 8);
+    assertSafeMode(nn1, 8, 8, 3, 0);
   }
 
   /**
@@ -281,11 +281,11 @@ public class TestHASafeMode {
 
     banner("Restarting standby");
     restartStandby();
-    assertSafeMode(nn1, 0, 5);
+    assertSafeMode(nn1, 0, 5, 3, 0);
     
     banner("Waiting for standby to catch up to active namespace");
     HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
-    assertSafeMode(nn1, 0, 0);
+    assertSafeMode(nn1, 0, 0, 3, 0);
   }
   
   /**
@@ -307,7 +307,7 @@ public class TestHASafeMode {
     restartStandby();
     
     // It will initially have all of the blocks necessary.
-    assertSafeMode(nn1, 10, 10);
+    assertSafeMode(nn1, 10, 10, 3, 0);
 
     // Delete those blocks while the SBN is in safe mode.
     // This doesn't affect the SBN, since deletions are not
@@ -322,14 +322,14 @@ public class TestHASafeMode {
     HATestUtil.waitForDNDeletions(cluster);
     cluster.triggerDeletionReports();
 
-    assertSafeMode(nn1, 10, 10);
+    assertSafeMode(nn1, 10, 10, 3, 0);
 
     // When we catch up to active namespace, it will restore back
     // to 0 blocks.
     banner("Waiting for standby to catch up to active namespace");
     HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
 
-    assertSafeMode(nn1, 0, 0);
+    assertSafeMode(nn1, 0, 0, 3, 0);
   }
   
   /**
@@ -355,20 +355,20 @@ public class TestHASafeMode {
     restartStandby();
     
     // It will initially have all of the blocks necessary.
-    assertSafeMode(nn1, 5, 5);
+    assertSafeMode(nn1, 5, 5, 3, 0);
 
     // Append to a block while SBN is in safe mode. This should
     // not affect safemode initially, since the DN message
     // will get queued.
     FSDataOutputStream stm = fs.append(new Path("/test"));
     try {
-      assertSafeMode(nn1, 5, 5);
+      assertSafeMode(nn1, 5, 5, 3, 0);
       
       // if we roll edits now, the SBN should see that it's under construction
       // and change its total count and safe count down by one, since UC
       // blocks are not counted by safe mode.
       HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
-      assertSafeMode(nn1, 4, 4);
+      assertSafeMode(nn1, 4, 4, 3, 0);
     } finally {
       IOUtils.closeStream(stm);
     }
@@ -386,13 +386,13 @@ public class TestHASafeMode {
     HATestUtil.waitForDNDeletions(cluster);
     cluster.triggerDeletionReports();
 
-    assertSafeMode(nn1, 4, 4);
+    assertSafeMode(nn1, 4, 4, 3, 0);
 
     // When we roll the edit log, the deletions will go through.
     banner("Waiting for standby to catch up to active namespace");
     HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
 
-    assertSafeMode(nn1, 0, 0);
+    assertSafeMode(nn1, 0, 0, 3, 0);
   }
   
   /**
@@ -424,20 +424,21 @@ public class TestHASafeMode {
     restartActive();
   }
   
-  private void assertSafeMode(NameNode nn, int safe, int total) {
-    String status = nn1.getNamesystem().getSafemode();
+  private static void assertSafeMode(NameNode nn, int safe, int total,
+    int numNodes, int nodeThresh) {
+    String status = nn.getNamesystem().getSafemode();
     if (safe == total) {
       assertTrue("Bad safemode status: '" + status + "'",
           status.startsWith(
-            "Safe mode is ON." +
-            "The reported blocks " + safe + " has reached the threshold " +
-            "0.9990 of total blocks " + total + ". Safe mode will be " +
-            "turned off automatically"));
+            "Safe mode is ON. The reported blocks " + safe + " has reached the "
+            + "threshold 0.9990 of total blocks " + total + ". The number of "
+            + "live datanodes " + numNodes + " has reached the minimum number "
+            + nodeThresh + ". Safe mode will be turned off automatically"));
     } else {
       int additional = total - safe;
       assertTrue("Bad safemode status: '" + status + "'",
           status.startsWith(
-              "Safe mode is ON." +
+              "Safe mode is ON. " +
               "The reported blocks " + safe + " needs additional " +
               additional + " blocks"));
     }
@@ -467,14 +468,14 @@ public class TestHASafeMode {
 
     // We expect it to be on its way out of safemode, since all of the blocks
     // from the edit log have been reported.
-    assertSafeMode(nn1, 3, 3);
+    assertSafeMode(nn1, 3, 3, 3, 0);
     
     // Initiate a failover into it while it's in safemode
     banner("Initiating a failover into NN1 in safemode");
     NameNodeAdapter.abortEditLogs(nn0);
     cluster.transitionToActive(1);
 
-    assertSafeMode(nn1, 5, 5);
+    assertSafeMode(nn1, 5, 5, 3, 0);
   }
   
   /**
@@ -499,10 +500,11 @@ public class TestHASafeMode {
     // It will initially have all of the blocks necessary.
     String status = nn1.getNamesystem().getSafemode();
     assertTrue("Bad safemode status: '" + status + "'",
-        status.startsWith(
-            "Safe mode is ON." +
-            "The reported blocks 10 has reached the threshold 0.9990 of " +
-            "total blocks 10. Safe mode will be turned off automatically"));
+      status.startsWith(
+        "Safe mode is ON. The reported blocks 10 has reached the threshold "
+        + "0.9990 of total blocks 10. The number of live datanodes 3 has "
+        + "reached the minimum number 0. Safe mode will be turned off "
+        + "automatically"));
 
     // Delete those blocks while the SBN is in safe mode.
     // Immediately roll the edit log before the actual deletions are sent
@@ -512,7 +514,7 @@ public class TestHASafeMode {
     HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
 
     // Should see removal of the blocks as well as their contribution to safe block count.
-    assertSafeMode(nn1, 0, 0);
+    assertSafeMode(nn1, 0, 0, 3, 0);
 
     
     banner("Triggering sending deletions to DNs and Deletion Reports");
@@ -525,7 +527,7 @@ public class TestHASafeMode {
     // No change in assertion status here, but some of the consistency checks
     // in safemode will fire here if we accidentally decrement safe block count
     // below 0.    
-    assertSafeMode(nn1, 0, 0);
+    assertSafeMode(nn1, 0, 0, 3, 0);
   }
   
 
@@ -561,11 +563,11 @@ public class TestHASafeMode {
     
     banner("Restarting SBN");
     restartStandby();
-    assertSafeMode(nn1, 10, 10);
+    assertSafeMode(nn1, 10, 10, 3, 0);
 
     banner("Allowing SBN to catch up");
     HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
-    assertSafeMode(nn1, 15, 15);
+    assertSafeMode(nn1, 15, 15, 3, 0);
   }
   
   /**
@@ -593,7 +595,7 @@ public class TestHASafeMode {
     nn0.getRpcServer().rollEditLog();
     
     restartStandby();
-    assertSafeMode(nn1, 6, 6);
+    assertSafeMode(nn1, 6, 6, 3, 0);
   }
   
   /**