You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2011/03/08 20:40:27 UTC

svn commit: r1079502 - in /hbase/branches/0.90: CHANGES.txt src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java src/main/java/org/apache/hadoop/hbase/util/FSUtils.java

Author: apurtell
Date: Tue Mar  8 19:40:26 2011
New Revision: 1079502

URL: http://svn.apache.org/viewvc?rev=1079502&view=rev
Log:
HBASE-1960  Master should wait for DFS to come up when creating hbase.version; use alternate strategy for waiting for DNs

Modified:
    hbase/branches/0.90/CHANGES.txt
    hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
    hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java

Modified: hbase/branches/0.90/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/CHANGES.txt?rev=1079502&r1=1079501&r2=1079502&view=diff
==============================================================================
--- hbase/branches/0.90/CHANGES.txt (original)
+++ hbase/branches/0.90/CHANGES.txt Tue Mar  8 19:40:26 2011
@@ -19,6 +19,8 @@ Release 0.90.2 - February 9th, 2011
    HBASE-3582  Allow HMaster and HRegionServer to login from keytab
                when on secure Hadoop
    HBASE-3608  MemstoreFlusher error message doesnt include exception!
+   HBASE-1960  Master should wait for DFS to come up when creating
+               hbase.version; use alternate strategy for waiting for DNs
   
   IMPROVEMENTS
    HBASE-3542  MultiGet methods in Thrift

Modified: hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java?rev=1079502&r1=1079501&r2=1079502&view=diff
==============================================================================
--- hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java (original)
+++ hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java Tue Mar  8 19:40:26 2011
@@ -231,7 +231,15 @@ public class MasterFileSystem {
     // Filesystem is good. Go ahead and check for hbase.rootdir.
     if (!fs.exists(rd)) {
       fs.mkdirs(rd);
-      FSUtils.setVersion(fs, rd);
+      // DFS leaves safe mode with 0 DNs when there are 0 blocks.
+      // We used to handle this by checking the current DN count and waiting until
+      // it is nonzero. With security, the check for datanode count doesn't work --
+      // it is a privileged op. So instead we adopt the strategy of the jobtracker
+      // and simply retry file creation during bootstrap indefinitely. As soon as
+      // there is one datanode it will succeed. Permission problems should have
+      // already been caught by mkdirs above.
+      FSUtils.setVersion(fs, rd, c.getInt(HConstants.THREAD_WAKE_FREQUENCY,
+        10 * 1000));
     } else {
       FSUtils.checkVersion(fs, rd, true);
     }

Modified: hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java?rev=1079502&r1=1079501&r2=1079502&view=diff
==============================================================================
--- hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java (original)
+++ hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java Tue Mar  8 19:40:26 2011
@@ -38,6 +38,7 @@ import org.apache.hadoop.hdfs.protocol.A
 import org.apache.hadoop.hdfs.protocol.FSConstants;
 import org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException;
 import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.util.StringUtils;
 
 import java.io.DataInputStream;
 import java.io.EOFException;
@@ -207,7 +208,20 @@ public class FSUtils {
    */
   public static void setVersion(FileSystem fs, Path rootdir)
   throws IOException {
-    setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION);
+    setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION, 0);
+  }
+
+  /**
+   * Sets version of file system
+   *
+   * @param fs filesystem object
+   * @param rootdir hbase root
+   * @param wait time to wait for retry
+   * @throws IOException e
+   */
+  public static void setVersion(FileSystem fs, Path rootdir, int wait)
+  throws IOException {
+    setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION, wait);
   }
 
   /**
@@ -216,15 +230,33 @@ public class FSUtils {
    * @param fs filesystem object
    * @param rootdir hbase root directory
    * @param version version to set
+   * @param wait time to wait for retry
    * @throws IOException e
    */
-  public static void setVersion(FileSystem fs, Path rootdir, String version)
-  throws IOException {
-    FSDataOutputStream s =
-      fs.create(new Path(rootdir, HConstants.VERSION_FILE_NAME));
-    s.writeUTF(version);
-    s.close();
-    LOG.debug("Created version file at " + rootdir.toString() + " set its version at:" + version);
+  public static void setVersion(FileSystem fs, Path rootdir, String version,
+      int wait) throws IOException {
+    while (true) try {
+      FSDataOutputStream s =
+        fs.create(new Path(rootdir, HConstants.VERSION_FILE_NAME));
+      s.writeUTF(version);
+      s.close();
+      LOG.debug("Created version file at " + rootdir.toString() +
+        " set its version at:" + version);
+      return;
+    } catch (IOException e) {
+      if (wait > 0) {
+        LOG.warn("Unable to create version file at " + rootdir.toString() +
+          ", retrying: " + StringUtils.stringifyException(e));
+        try {
+          Thread.sleep(wait);
+        } catch (InterruptedException ex) {
+          // ignore
+        }
+      } else {
+        // rethrow
+        throw e;
+      }
+    }
   }
 
   /**
@@ -262,22 +294,6 @@ public class FSUtils {
     FileSystem fs = FileSystem.get(conf);
     if (!(fs instanceof DistributedFileSystem)) return;
     DistributedFileSystem dfs = (DistributedFileSystem)fs;
-    // Are there any data nodes up yet?
-    // Currently the safe mode check falls through if the namenode is up but no
-    // datanodes have reported in yet.
-    try {
-      while (dfs.getDataNodeStats().length == 0) {
-        LOG.info("Waiting for dfs to come up...");
-        try {
-          Thread.sleep(wait);
-        } catch (InterruptedException e) {
-          //continue
-        }
-      }
-    } catch (IOException e) {
-      // getDataNodeStats can fail if superuser privilege is required to run
-      // the datanode report, just ignore it
-    }
     // Make sure dfs is not in safe mode
     while (dfs.setSafeMode(FSConstants.SafeModeAction.SAFEMODE_GET)) {
       LOG.info("Waiting for dfs to exit safe mode...");