You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2011/03/08 20:40:27 UTC
svn commit: r1079502 - in /hbase/branches/0.90: CHANGES.txt
src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
src/main/java/org/apache/hadoop/hbase/util/FSUtils.java
Author: apurtell
Date: Tue Mar 8 19:40:26 2011
New Revision: 1079502
URL: http://svn.apache.org/viewvc?rev=1079502&view=rev
Log:
HBASE-1960 Master should wait for DFS to come up when creating hbase.version; use alternate strategy for waiting for DNs
Modified:
hbase/branches/0.90/CHANGES.txt
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java
Modified: hbase/branches/0.90/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/CHANGES.txt?rev=1079502&r1=1079501&r2=1079502&view=diff
==============================================================================
--- hbase/branches/0.90/CHANGES.txt (original)
+++ hbase/branches/0.90/CHANGES.txt Tue Mar 8 19:40:26 2011
@@ -19,6 +19,8 @@ Release 0.90.2 - February 9th, 2011
HBASE-3582 Allow HMaster and HRegionServer to login from keytab
when on secure Hadoop
HBASE-3608 MemstoreFlusher error message doesnt include exception!
+ HBASE-1960 Master should wait for DFS to come up when creating
+ hbase.version; use alternate strategy for waiting for DNs
IMPROVEMENTS
HBASE-3542 MultiGet methods in Thrift
Modified: hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java?rev=1079502&r1=1079501&r2=1079502&view=diff
==============================================================================
--- hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java (original)
+++ hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java Tue Mar 8 19:40:26 2011
@@ -231,7 +231,15 @@ public class MasterFileSystem {
// Filesystem is good. Go ahead and check for hbase.rootdir.
if (!fs.exists(rd)) {
fs.mkdirs(rd);
- FSUtils.setVersion(fs, rd);
+ // DFS leaves safe mode with 0 DNs when there are 0 blocks.
+ // We used to handle this by checking the current DN count and waiting until
+ // it is nonzero. With security, the check for datanode count doesn't work --
+ // it is a privileged op. So instead we adopt the strategy of the jobtracker
+ // and simply retry file creation during bootstrap indefinitely. As soon as
+ // there is one datanode it will succeed. Permission problems should have
+ // already been caught by mkdirs above.
+ FSUtils.setVersion(fs, rd, c.getInt(HConstants.THREAD_WAKE_FREQUENCY,
+ 10 * 1000));
} else {
FSUtils.checkVersion(fs, rd, true);
}
Modified: hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java?rev=1079502&r1=1079501&r2=1079502&view=diff
==============================================================================
--- hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java (original)
+++ hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java Tue Mar 8 19:40:26 2011
@@ -38,6 +38,7 @@ import org.apache.hadoop.hdfs.protocol.A
import org.apache.hadoop.hdfs.protocol.FSConstants;
import org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException;
import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.util.StringUtils;
import java.io.DataInputStream;
import java.io.EOFException;
@@ -207,7 +208,20 @@ public class FSUtils {
*/
public static void setVersion(FileSystem fs, Path rootdir)
throws IOException {
- setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION);
+ setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION, 0);
+ }
+
+ /**
+ * Sets version of file system
+ *
+ * @param fs filesystem object
+ * @param rootdir hbase root
+ * @param wait time to wait for retry
+ * @throws IOException e
+ */
+ public static void setVersion(FileSystem fs, Path rootdir, int wait)
+ throws IOException {
+ setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION, wait);
}
/**
@@ -216,15 +230,33 @@ public class FSUtils {
* @param fs filesystem object
* @param rootdir hbase root directory
* @param version version to set
+ * @param wait time to wait for retry
* @throws IOException e
*/
- public static void setVersion(FileSystem fs, Path rootdir, String version)
- throws IOException {
- FSDataOutputStream s =
- fs.create(new Path(rootdir, HConstants.VERSION_FILE_NAME));
- s.writeUTF(version);
- s.close();
- LOG.debug("Created version file at " + rootdir.toString() + " set its version at:" + version);
+ public static void setVersion(FileSystem fs, Path rootdir, String version,
+ int wait) throws IOException {
+ while (true) try {
+ FSDataOutputStream s =
+ fs.create(new Path(rootdir, HConstants.VERSION_FILE_NAME));
+ s.writeUTF(version);
+ s.close();
+ LOG.debug("Created version file at " + rootdir.toString() +
+ " set its version at:" + version);
+ return;
+ } catch (IOException e) {
+ if (wait > 0) {
+ LOG.warn("Unable to create version file at " + rootdir.toString() +
+ ", retrying: " + StringUtils.stringifyException(e));
+ try {
+ Thread.sleep(wait);
+ } catch (InterruptedException ex) {
+ // ignore
+ }
+ } else {
+ // rethrow
+ throw e;
+ }
+ }
}
/**
@@ -262,22 +294,6 @@ public class FSUtils {
FileSystem fs = FileSystem.get(conf);
if (!(fs instanceof DistributedFileSystem)) return;
DistributedFileSystem dfs = (DistributedFileSystem)fs;
- // Are there any data nodes up yet?
- // Currently the safe mode check falls through if the namenode is up but no
- // datanodes have reported in yet.
- try {
- while (dfs.getDataNodeStats().length == 0) {
- LOG.info("Waiting for dfs to come up...");
- try {
- Thread.sleep(wait);
- } catch (InterruptedException e) {
- //continue
- }
- }
- } catch (IOException e) {
- // getDataNodeStats can fail if superuser privilege is required to run
- // the datanode report, just ignore it
- }
// Make sure dfs is not in safe mode
while (dfs.setSafeMode(FSConstants.SafeModeAction.SAFEMODE_GET)) {
LOG.info("Waiting for dfs to exit safe mode...");