You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ji...@apache.org on 2007/09/12 01:39:17 UTC
svn commit: r574731 - in /lucene/hadoop/trunk/src/contrib/hbase: CHANGES.txt
src/java/org/apache/hadoop/hbase/HMaster.java
src/java/org/apache/hadoop/hbase/HRegionServer.java
src/java/org/apache/hadoop/hbase/util/FSUtils.java
Author: jimk
Date: Tue Sep 11 16:39:17 2007
New Revision: 574731
URL: http://svn.apache.org/viewvc?rev=574731&view=rev
Log:
HADOOP-1870 Once file system failure has been detected, don't check it again and get on with shutting down the hbase cluster.
Modified:
lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java
lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/util/FSUtils.java
Modified: lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt?rev=574731&r1=574730&r2=574731&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt Tue Sep 11 16:39:17 2007
@@ -39,6 +39,8 @@
HADOOP-1834 Scanners ignore timestamp passed on creation
HADOOP-1847 Many HBase tests do not fail well.
HADOOP-1847 Many HBase tests do not fail well. (phase 2)
+ HADOOP-1870 Once file system failure has been detected, don't check it again
+ and get on with shutting down the hbase cluster.
IMPROVEMENTS
HADOOP-1737 Make HColumnDescriptor data publically members settable
Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java?rev=574731&r1=574730&r2=574731&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java Tue Sep 11 16:39:17 2007
@@ -85,6 +85,7 @@
static final Log LOG = LogFactory.getLog(HMaster.class.getName());
volatile boolean closed;
+ volatile boolean fsOk;
Path dir;
Configuration conf;
FileSystem fs;
@@ -511,6 +512,12 @@
LOG.warn("Scan ROOT region", e);
} else {
LOG.error("Scan ROOT region", e);
+
+ if (tries == numRetries - 1) {
+ // We ran out of tries. Make sure the file system is still available
+
+ checkFileSystem();
+ }
}
} catch (Exception e) {
// If for some reason we get some other kind of exception,
@@ -518,13 +525,6 @@
LOG.error("Unexpected exception", e);
}
- // We ran out of tries. Make sure the file system is still available
-
- if (!FSUtils.isFileSystemAvailable(fs)) {
- LOG.fatal("Shutting down hbase cluster: file system not available");
- closed = true;
- }
-
if (!closed) {
// sleep before retry
@@ -681,20 +681,18 @@
LOG.warn("Scan one META region", e);
} else {
LOG.error("Scan one META region", e);
+
+ if (tries == numRetries - 1) {
+ // We ran out of tries. Make sure the file system is still available
+
+ checkFileSystem();
+ }
}
} catch (Exception e) {
// If for some reason we get some other kind of exception,
// at least log it rather than go out silently.
LOG.error("Unexpected exception", e);
}
-
- // We ran out of tries. Make sure the file system is still available
-
- if (!FSUtils.isFileSystemAvailable(fs)) {
- LOG.fatal("Shutting down hbase cluster: file system not available");
- closed = true;
- }
-
if (!closed) {
// sleep before retry
try {
@@ -852,6 +850,7 @@
throws IOException {
this.closed = true;
+ this.fsOk = true;
this.dir = dir;
this.conf = conf;
this.fs = FileSystem.get(conf);
@@ -979,6 +978,23 @@
LOG.info("HMaster initialized on " + this.address.toString());
}
+ /**
+ * Checks to see if the file system is still accessible.
+ * If not, sets closed
+ *
+ * @return false if file system is not available
+ */
+ protected boolean checkFileSystem() {
+ if (fsOk) {
+ if (!FSUtils.isFileSystemAvailable(fs)) {
+ LOG.fatal("Shutting down HBase cluster: file system not available");
+ closed = true;
+ fsOk = false;
+ }
+ }
+ return fsOk;
+ }
+
/** @return HServerAddress of the master server */
public HServerAddress getMasterAddress() {
return address;
@@ -1071,9 +1087,7 @@
LOG.warn("main processing loop: " + op.toString(), e);
}
}
- if (!FSUtils.isFileSystemAvailable(fs)) {
- LOG.fatal("Shutting down hbase cluster: file system not available");
- closed = true;
+ if (!checkFileSystem()) {
break;
}
LOG.warn("Processing pending operations: " + op.toString(), ex);
@@ -2664,10 +2678,7 @@
if (tries == numRetries - 1) {
// No retries left
- if (!FSUtils.isFileSystemAvailable(fs)) {
- LOG.fatal("Shutting down hbase cluster: file system not available");
- closed = true;
- }
+ checkFileSystem();
if (e instanceof RemoteException) {
e = RemoteExceptionHandler.decodeRemoteException(
Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java?rev=574731&r1=574730&r2=574731&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java Tue Sep 11 16:39:17 2007
@@ -84,6 +84,9 @@
// debugging and unit tests.
protected volatile boolean abortRequested;
+ // If false, the file system has become unavailable
+ protected volatile boolean fsOk;
+
final Path rootDir;
protected final HServerInfo serverInfo;
protected final Configuration conf;
@@ -435,6 +438,7 @@
// Basic setup
this.stopRequested = false;
this.abortRequested = false;
+ this.fsOk = true;
this.rootDir = rootDir;
this.conf = conf;
this.rand = new Random();
@@ -512,6 +516,11 @@
}
}
+ /** @return the HLog */
+ HLog getLog() {
+ return log;
+ }
+
/**
* Sets a flag that will cause all the HRegionServer threads to shut down
* in an orderly fashion.
@@ -1101,6 +1110,7 @@
}
}
+ /** {@inheritDoc} */
public void batchUpdate(Text regionName, long timestamp, BatchUpdate b)
throws IOException {
requestCount.incrementAndGet();
@@ -1259,6 +1269,7 @@
region.delete(lockid, column);
}
+ /** {@inheritDoc} */
public void deleteAll(final Text regionName, final Text row,
final Text column, final long timestamp)
throws IOException {
@@ -1326,12 +1337,13 @@
* @return false if file system is not available
*/
protected boolean checkFileSystem() {
- boolean fsOk = true;
- if (!FSUtils.isFileSystemAvailable(fs)) {
- LOG.fatal("Shutting down HRegionServer: file system not available");
- abortRequested = true;
- stopRequested = true;
- fsOk = false;
+ if (fsOk) {
+ if (!FSUtils.isFileSystemAvailable(fs)) {
+ LOG.fatal("Shutting down HRegionServer: file system not available");
+ abortRequested = true;
+ stopRequested = true;
+ fsOk = false;
+ }
}
return fsOk;
}
Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/util/FSUtils.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/util/FSUtils.java?rev=574731&r1=574730&r2=574731&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/util/FSUtils.java (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/util/FSUtils.java Tue Sep 11 16:39:17 2007
@@ -54,6 +54,15 @@
} catch (IOException e) {
LOG.fatal("file system unavailable because: ", e);
}
+
+ try {
+ if (!available) {
+ fs.close();
+ }
+
+ } catch (IOException e) {
+ LOG.error("file system close", e);
+ }
} else {
available = true;