You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by to...@apache.org on 2012/04/11 07:16:07 UTC

svn commit: r1324558 - in /hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs: CHANGES.txt src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java

Author: todd
Date: Wed Apr 11 05:16:06 2012
New Revision: 1324558

URL: http://svn.apache.org/viewvc?rev=1324558&view=rev
Log:
HDFS-3247. Improve bootstrapStandby behavior when original NN is not active. Contributed by Todd Lipcon.

Modified:
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1324558&r1=1324557&r2=1324558&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Wed Apr 11 05:16:06 2012
@@ -362,6 +362,9 @@ Release 2.0.0 - UNRELEASED 
 
     HDFS-3244. Remove dead writable code from hdfs/protocol. (eli)
 
+    HDFS-3247. Improve bootstrapStandby behavior when original NN is not active
+    (todd)
+
   OPTIMIZATIONS
 
     HDFS-3024. Improve performance of stringification in addStoredBlock (todd)

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java?rev=1324558&r1=1324557&r2=1324558&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java Wed Apr 11 05:16:06 2012
@@ -33,10 +33,14 @@ import org.apache.hadoop.HadoopIllegalAr
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.ha.HAServiceProtocol;
+import org.apache.hadoop.ha.HAServiceStatus;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
+import org.apache.hadoop.ha.ServiceFailedException;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HAUtil;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.NameNodeProxies;
-import org.apache.hadoop.hdfs.NameNodeProxies.ProxyAndInfo;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.server.namenode.CheckpointSignature;
 import org.apache.hadoop.hdfs.server.namenode.EditLogInputStream;
@@ -47,8 +51,10 @@ import org.apache.hadoop.hdfs.server.nam
 import org.apache.hadoop.hdfs.server.namenode.TransferFsImage;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
+import org.apache.hadoop.hdfs.tools.NNHAServiceTarget;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.MD5Hash;
+import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.Tool;
@@ -65,7 +71,7 @@ import com.google.common.collect.Sets;
  */
 @InterfaceAudience.Private
 public class BootstrapStandby implements Tool, Configurable {
-  private static final Log LOG = LogFactory.getLog(BootstrapStandby.class); 
+  private static final Log LOG = LogFactory.getLog(BootstrapStandby.class);
   private String nsId;
   private String nnId;
   private String otherNNId;
@@ -79,7 +85,13 @@ public class BootstrapStandby implements
   
   private boolean force = false;
   private boolean interactive = true;
-  
+
+  // Exit/return codes.
+  static final int ERR_CODE_FAILED_CONNECT = 2;
+  static final int ERR_CODE_INVALID_VERSION = 3;
+  static final int ERR_CODE_OTHER_NN_NOT_ACTIVE = 4;
+  static final int ERR_CODE_ALREADY_FORMATTED = 5;
+  static final int ERR_CODE_LOGS_UNAVAILABLE = 6; 
 
   public int run(String[] args) throws Exception {
     SecurityUtil.initKrb5CipherSuites();
@@ -121,24 +133,43 @@ public class BootstrapStandby implements
     System.err.println("Usage: " + this.getClass().getSimpleName() +
         "[-force] [-nonInteractive]");
   }
+  
+  private NamenodeProtocol createNNProtocolProxy()
+      throws IOException {
+    return NameNodeProxies.createNonHAProxy(getConf(),
+        otherIpcAddr, NamenodeProtocol.class,
+        UserGroupInformation.getLoginUser(), true)
+        .getProxy();
+  }
+  
+  private HAServiceProtocol createHAProtocolProxy()
+      throws IOException {
+    return new NNHAServiceTarget(new HdfsConfiguration(conf),
+        nsId, otherNNId).getProxy(conf, 15000);
+  }
 
   private int doRun() throws IOException {
-    ProxyAndInfo<NamenodeProtocol> proxyAndInfo = NameNodeProxies.createNonHAProxy(getConf(),
-      otherIpcAddr, NamenodeProtocol.class,
-      UserGroupInformation.getLoginUser(), true);
-    NamenodeProtocol proxy = proxyAndInfo.getProxy();
+
+    NamenodeProtocol proxy = createNNProtocolProxy();
     NamespaceInfo nsInfo;
     try {
       nsInfo = proxy.versionRequest();
-      checkLayoutVersion(nsInfo);
     } catch (IOException ioe) {
       LOG.fatal("Unable to fetch namespace information from active NN at " +
           otherIpcAddr + ": " + ioe.getMessage());
       if (LOG.isDebugEnabled()) {
         LOG.debug("Full exception trace", ioe);
       }
-      return 1;
+      return ERR_CODE_FAILED_CONNECT;
     }
+
+    if (!checkLayoutVersion(nsInfo)) {
+      LOG.fatal("Layout version on remote node (" +
+          nsInfo.getLayoutVersion() + ") does not match " +
+          "this node's layout version (" + HdfsConstants.LAYOUT_VERSION + ")");
+      return ERR_CODE_INVALID_VERSION;
+    }
+
     
     System.out.println(
         "=====================================================\n" +
@@ -153,12 +184,35 @@ public class BootstrapStandby implements
         "           Layout version: " + nsInfo.getLayoutVersion() + "\n" +
         "=====================================================");
 
+    // Ensure the other NN is active - we can't force it to roll edit logs
+    // below if it's not active.
+    if (!isOtherNNActive()) {
+      String err = "NameNode " + nsId + "." + nnId + " at " + otherIpcAddr +
+          " is not currently in ACTIVE state.";
+      if (!interactive) {
+        LOG.fatal(err + " Please transition it to " +
+            "active before attempting to bootstrap a standby node.");
+        return ERR_CODE_OTHER_NN_NOT_ACTIVE;
+      }
+      
+      System.err.println(err);
+      if (ToolRunner.confirmPrompt(
+            "Do you want to automatically transition it to active now?")) {
+        transitionOtherNNActive();
+      } else {
+        LOG.fatal("User aborted. Exiting without bootstrapping standby.");
+        return ERR_CODE_OTHER_NN_NOT_ACTIVE;
+      }
+    }
+    
+
+    
     // Check with the user before blowing away data.
     if (!NameNode.confirmFormat(
             Sets.union(Sets.newHashSet(dirsToFormat),
                 Sets.newHashSet(editUrisToFormat)),
             force, interactive)) {
-      return 1;
+      return ERR_CODE_ALREADY_FORMATTED;
     }
 
     // Force the active to roll its log
@@ -180,7 +234,7 @@ public class BootstrapStandby implements
     // Ensure that we have enough edits already in the shared directory to
     // start up from the last checkpoint on the active.
     if (!checkLogsAvailableForRead(image, imageTxId, rollTxId)) {
-      return 1;
+      return ERR_CODE_LOGS_UNAVAILABLE;
     }
     
     image.getStorage().writeTransactionIdFileToStorage(rollTxId);
@@ -193,6 +247,14 @@ public class BootstrapStandby implements
     return 0;
   }
 
+  
+  private void transitionOtherNNActive()
+      throws AccessControlException, ServiceFailedException, IOException {
+    LOG.info("Transitioning the running namenode to active...");
+    createHAProtocolProxy().transitionToActive();    
+    LOG.info("Successful");
+  }
+
   private boolean checkLogsAvailableForRead(FSImage image, long imageTxId,
       long rollTxId) {
     
@@ -225,12 +287,14 @@ public class BootstrapStandby implements
     }
   }
 
-  private void checkLayoutVersion(NamespaceInfo nsInfo) throws IOException {
-    if (nsInfo.getLayoutVersion() != HdfsConstants.LAYOUT_VERSION) {
-      throw new IOException("Layout version on remote node (" +
-          nsInfo.getLayoutVersion() + ") does not match " +
-          "this node's layout version (" + HdfsConstants.LAYOUT_VERSION + ")");
-    }
+  private boolean checkLayoutVersion(NamespaceInfo nsInfo) throws IOException {
+    return (nsInfo.getLayoutVersion() == HdfsConstants.LAYOUT_VERSION);
+  }
+  
+  private boolean isOtherNNActive()
+      throws AccessControlException, IOException {
+    HAServiceStatus status = createHAProtocolProxy().getServiceStatus();
+    return status.getState() == HAServiceState.ACTIVE;
   }
 
   private void parseConfAndFindOtherNN() throws IOException {

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java?rev=1324558&r1=1324557&r2=1324558&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java Wed Apr 11 05:16:06 2012
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hdfs.server.namenode.ha;
 
+import java.io.ByteArrayInputStream;
 import java.io.File;
 import java.io.IOException;
 import java.net.URI;
@@ -40,6 +41,7 @@ import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 
+import com.google.common.base.Suppliers;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
 
@@ -170,7 +172,7 @@ public class TestBootstrapStandby {
       int rc = BootstrapStandby.run(
           new String[]{"-force"},
           cluster.getConfiguration(1));
-      assertEquals(1, rc);
+      assertEquals(BootstrapStandby.ERR_CODE_LOGS_UNAVAILABLE, rc);
     } finally {
       logs.stopCapturing();
     }
@@ -184,7 +186,7 @@ public class TestBootstrapStandby {
     int rc = BootstrapStandby.run(
         new String[]{"-nonInteractive"},
         cluster.getConfiguration(1));
-    assertEquals(1, rc);
+    assertEquals(BootstrapStandby.ERR_CODE_ALREADY_FORMATTED, rc);
 
     // Should pass with -force
     rc = BootstrapStandby.run(
@@ -192,6 +194,24 @@ public class TestBootstrapStandby {
         cluster.getConfiguration(1));
     assertEquals(0, rc);
   }
+  
+  @Test(timeout=30000)
+  public void testOtherNodeNotActive() throws Exception {
+    cluster.transitionToStandby(0);
+    int rc = BootstrapStandby.run(
+        new String[]{"-nonInteractive"},
+        cluster.getConfiguration(1));
+    assertEquals(BootstrapStandby.ERR_CODE_OTHER_NN_NOT_ACTIVE, rc);
+    
+    // Answer "yes" to the prompt about transition to active
+    System.setIn(new ByteArrayInputStream("yes\n".getBytes()));
+    rc = BootstrapStandby.run(
+        new String[]{"-force"},
+        cluster.getConfiguration(1));
+    assertEquals(0, rc);
+    
+    assertFalse(nn0.getNamesystem().isInStandbyState());
+  }
 
   private void assertNNFilesMatch() throws Exception {
     List<File> curDirs = Lists.newArrayList();