You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ji...@apache.org on 2007/09/12 01:39:17 UTC

svn commit: r574731 - in /lucene/hadoop/trunk/src/contrib/hbase: CHANGES.txt src/java/org/apache/hadoop/hbase/HMaster.java src/java/org/apache/hadoop/hbase/HRegionServer.java src/java/org/apache/hadoop/hbase/util/FSUtils.java

Author: jimk
Date: Tue Sep 11 16:39:17 2007
New Revision: 574731

URL: http://svn.apache.org/viewvc?rev=574731&view=rev
Log:
HADOOP-1870 Once file system failure has been detected, don't check it again and get on with shutting down the hbase cluster.

Modified:
    lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/util/FSUtils.java

Modified: lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt?rev=574731&r1=574730&r2=574731&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt Tue Sep 11 16:39:17 2007
@@ -39,6 +39,8 @@
     HADOOP-1834 Scanners ignore timestamp passed on creation
     HADOOP-1847 Many HBase tests do not fail well.
     HADOOP-1847 Many HBase tests do not fail well. (phase 2)
+    HADOOP-1870 Once file system failure has been detected, don't check it again
+                and get on with shutting down the hbase cluster.
 
   IMPROVEMENTS
     HADOOP-1737 Make HColumnDescriptor data publically members settable

Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java?rev=574731&r1=574730&r2=574731&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java Tue Sep 11 16:39:17 2007
@@ -85,6 +85,7 @@
   static final Log LOG = LogFactory.getLog(HMaster.class.getName());
 
   volatile boolean closed;
+  volatile boolean fsOk;
   Path dir;
   Configuration conf;
   FileSystem fs;
@@ -511,6 +512,12 @@
             LOG.warn("Scan ROOT region", e);
           } else {
             LOG.error("Scan ROOT region", e);
+            
+            if (tries == numRetries - 1) {
+              // We ran out of tries. Make sure the file system is still available
+
+              checkFileSystem();
+            }
           }
         } catch (Exception e) {
           // If for some reason we get some other kind of exception, 
@@ -518,13 +525,6 @@
           LOG.error("Unexpected exception", e);
         }
         
-        // We ran out of tries. Make sure the file system is still available
-        
-        if (!FSUtils.isFileSystemAvailable(fs)) {
-          LOG.fatal("Shutting down hbase cluster: file system not available");
-          closed = true;
-        }
-        
         if (!closed) {
           // sleep before retry
 
@@ -681,20 +681,18 @@
             LOG.warn("Scan one META region", e);
           } else {
             LOG.error("Scan one META region", e);
+            
+            if (tries == numRetries - 1) {
+              // We ran out of tries. Make sure the file system is still available
+
+              checkFileSystem();
+            }
           }
         } catch (Exception e) {
           // If for some reason we get some other kind of exception, 
           // at least log it rather than go out silently.
           LOG.error("Unexpected exception", e);
         }
-        
-        // We ran out of tries. Make sure the file system is still available
-        
-        if (!FSUtils.isFileSystemAvailable(fs)) {
-          LOG.fatal("Shutting down hbase cluster: file system not available");
-          closed = true;
-        }
-        
         if (!closed) {
           // sleep before retry
           try {
@@ -852,6 +850,7 @@
     throws IOException {
     
     this.closed = true;
+    this.fsOk = true;
     this.dir = dir;
     this.conf = conf;
     this.fs = FileSystem.get(conf);
@@ -979,6 +978,23 @@
     LOG.info("HMaster initialized on " + this.address.toString());
   }
 
+  /**
+   * Checks to see if the file system is still accessible.
+   * If not, sets closed
+   * 
+   * @return false if file system is not available
+   */
+  protected boolean checkFileSystem() {
+    if (fsOk) {
+      if (!FSUtils.isFileSystemAvailable(fs)) {
+        LOG.fatal("Shutting down HBase cluster: file system not available");
+        closed = true;
+        fsOk = false;
+      }
+    }
+    return fsOk;
+  }
+
   /** @return HServerAddress of the master server */
   public HServerAddress getMasterAddress() {
     return address;
@@ -1071,9 +1087,7 @@
             LOG.warn("main processing loop: " + op.toString(), e);
           }
         }
-        if (!FSUtils.isFileSystemAvailable(fs)) {
-          LOG.fatal("Shutting down hbase cluster: file system not available");
-          closed = true;
+        if (!checkFileSystem()) {
           break;
         }
         LOG.warn("Processing pending operations: " + op.toString(), ex);
@@ -2664,10 +2678,7 @@
           if (tries == numRetries - 1) {
             // No retries left
             
-            if (!FSUtils.isFileSystemAvailable(fs)) {
-              LOG.fatal("Shutting down hbase cluster: file system not available");
-              closed = true;
-            }
+            checkFileSystem();
 
             if (e instanceof RemoteException) {
               e = RemoteExceptionHandler.decodeRemoteException(

Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java?rev=574731&r1=574730&r2=574731&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java Tue Sep 11 16:39:17 2007
@@ -84,6 +84,9 @@
   // debugging and unit tests.
   protected volatile boolean abortRequested;
   
+  // If false, the file system has become unavailable
+  protected volatile boolean fsOk;
+  
   final Path rootDir;
   protected final HServerInfo serverInfo;
   protected final Configuration conf;
@@ -435,6 +438,7 @@
     // Basic setup
     this.stopRequested = false;
     this.abortRequested = false;
+    this.fsOk = true;
     this.rootDir = rootDir;
     this.conf = conf;
     this.rand = new Random();
@@ -512,6 +516,11 @@
     }
   }
 
+  /** @return the HLog */
+  HLog getLog() {
+    return log;
+  }
+
   /**
    * Sets a flag that will cause all the HRegionServer threads to shut down
    * in an orderly fashion.
@@ -1101,6 +1110,7 @@
     }
   }
 
+  /** {@inheritDoc} */
   public void batchUpdate(Text regionName, long timestamp, BatchUpdate b)
   throws IOException {  
     requestCount.incrementAndGet();
@@ -1259,6 +1269,7 @@
     region.delete(lockid, column);
   }
   
+  /** {@inheritDoc} */
   public void deleteAll(final Text regionName, final Text row,
       final Text column, final long timestamp) 
   throws IOException {
@@ -1326,12 +1337,13 @@
    * @return false if file system is not available
    */
   protected boolean checkFileSystem() {
-    boolean fsOk = true;
-    if (!FSUtils.isFileSystemAvailable(fs)) {
-      LOG.fatal("Shutting down HRegionServer: file system not available");
-      abortRequested = true;
-      stopRequested = true;
-      fsOk = false;
+    if (fsOk) {
+      if (!FSUtils.isFileSystemAvailable(fs)) {
+        LOG.fatal("Shutting down HRegionServer: file system not available");
+        abortRequested = true;
+        stopRequested = true;
+        fsOk = false;
+      }
     }
     return fsOk;
   }

Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/util/FSUtils.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/util/FSUtils.java?rev=574731&r1=574730&r2=574731&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/util/FSUtils.java (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/util/FSUtils.java Tue Sep 11 16:39:17 2007
@@ -54,6 +54,15 @@
       } catch (IOException e) {
         LOG.fatal("file system unavailable because: ", e);
       }
+
+      try {
+        if (!available) {
+          fs.close();
+        }
+        
+      } catch (IOException e) {
+        LOG.error("file system close", e);
+      }
       
     } else {
       available = true;