You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2008/10/15 23:44:27 UTC

svn commit: r705064 - in /hadoop/hbase: branches/0.18/ branches/0.18/src/java/org/apache/hadoop/hbase/regionserver/ branches/0.18/src/java/org/apache/hadoop/hbase/util/ trunk/ trunk/src/java/org/apache/hadoop/hbase/regionserver/ trunk/src/java/org/apac...

Author: stack
Date: Wed Oct 15 14:44:26 2008
New Revision: 705064

URL: http://svn.apache.org/viewvc?rev=705064&view=rev
Log:
HBASE-930 RegionServer stuck: HLog: Could not append. Requesting close of log java.io.IOException: Could not get block locations. Aborting...

Added:
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/FailedLogCloseException.java
Modified:
    hadoop/hbase/branches/0.18/CHANGES.txt
    hadoop/hbase/branches/0.18/src/java/org/apache/hadoop/hbase/regionserver/Flusher.java
    hadoop/hbase/branches/0.18/src/java/org/apache/hadoop/hbase/regionserver/HLog.java
    hadoop/hbase/branches/0.18/src/java/org/apache/hadoop/hbase/regionserver/LogRoller.java
    hadoop/hbase/branches/0.18/src/java/org/apache/hadoop/hbase/util/FSUtils.java
    hadoop/hbase/trunk/CHANGES.txt
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/Flusher.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HLog.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/LogRoller.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/FSUtils.java

Modified: hadoop/hbase/branches/0.18/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.18/CHANGES.txt?rev=705064&r1=705063&r2=705064&view=diff
==============================================================================
--- hadoop/hbase/branches/0.18/CHANGES.txt (original)
+++ hadoop/hbase/branches/0.18/CHANGES.txt Wed Oct 15 14:44:26 2008
@@ -13,6 +13,8 @@
    HBASE-928   NPE throwing RetriesExhaustedException
    HBASE-576   Investigate IPC performance; partial.
    HBASE-924   Update hadoop in lib on 0.18 hbase branch to 0.18.1
+   HBASE-930   RegionServer stuck: HLog: Could not append. Requesting close of
+               log java.io.IOException: Could not get block locations. Aborting...
 
   IMPROVEMENTS
    HBASE-920   Make region balancing sloppier

Modified: hadoop/hbase/branches/0.18/src/java/org/apache/hadoop/hbase/regionserver/Flusher.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.18/src/java/org/apache/hadoop/hbase/regionserver/Flusher.java?rev=705064&r1=705063&r2=705064&view=diff
==============================================================================
--- hadoop/hbase/branches/0.18/src/java/org/apache/hadoop/hbase/regionserver/Flusher.java (original)
+++ hadoop/hbase/branches/0.18/src/java/org/apache/hadoop/hbase/regionserver/Flusher.java Wed Oct 15 14:44:26 2008
@@ -178,7 +178,7 @@
       // is required. Currently the only way to do this is a restart of
       // the server. Abort because hdfs is probably bad (HBASE-644 is a case
       // where hdfs was bad but passed the hdfs check).
-      LOG.fatal("Replay of hlog required. Forcing server restart", ex);
+      LOG.fatal("Replay of hlog required. Forcing server shutdown", ex);
       server.abort();
       return false;
     } catch (IOException ex) {

Modified: hadoop/hbase/branches/0.18/src/java/org/apache/hadoop/hbase/regionserver/HLog.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.18/src/java/org/apache/hadoop/hbase/regionserver/HLog.java?rev=705064&r1=705063&r2=705064&view=diff
==============================================================================
--- hadoop/hbase/branches/0.18/src/java/org/apache/hadoop/hbase/regionserver/HLog.java (original)
+++ hadoop/hbase/branches/0.18/src/java/org/apache/hadoop/hbase/regionserver/HLog.java Wed Oct 15 14:44:26 2008
@@ -226,9 +226,10 @@
    * cacheFlushLock and then completeCacheFlush could be called which would wait
    * for the lock on this and consequently never release the cacheFlushLock
    *
+   * @throws FailedLogCloseException
    * @throws IOException
    */
-  public void rollWriter() throws IOException {
+  public void rollWriter() throws FailedLogCloseException, IOException {
     this.cacheFlushLock.lock();
     try {
       if (closed) {
@@ -237,7 +238,14 @@
       synchronized (updateLock) {
         if (this.writer != null) {
           // Close the current writer, get a new one.
-          this.writer.close();
+          try {
+            this.writer.close();
+          } catch (IOException e) {
+            // Failed close of log file.  Means we're losing edits.  For now,
+            // shut ourselves down to minimize loss.  Alternative is to try and
+            // keep going.  See HBASE-930.
+            throw new FailedLogCloseException("#" + this.filenum, e);
+          }
           Path p = computeFilename(old_filenum);
           if (LOG.isDebugEnabled()) {
             LOG.debug("Closing current log writer " + FSUtils.getPath(p));

Modified: hadoop/hbase/branches/0.18/src/java/org/apache/hadoop/hbase/regionserver/LogRoller.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.18/src/java/org/apache/hadoop/hbase/regionserver/LogRoller.java?rev=705064&r1=705063&r2=705064&view=diff
==============================================================================
--- hadoop/hbase/branches/0.18/src/java/org/apache/hadoop/hbase/regionserver/LogRoller.java (original)
+++ hadoop/hbase/branches/0.18/src/java/org/apache/hadoop/hbase/regionserver/LogRoller.java Wed Oct 15 14:44:26 2008
@@ -77,8 +77,11 @@
       try {
         LOG.info("Rolling hlog. Number of entries: " + server.getLog().getNumEntries());
         server.getLog().rollWriter();
+      } catch (FailedLogCloseException e) {
+        LOG.fatal("Forcing server shutdown", e);
+        server.abort();
       } catch (IOException ex) {
-        LOG.error("Log rolling failed",
+        LOG.error("Log rolling failed with ioe: ",
             RemoteExceptionHandler.checkIOException(ex));
         server.checkFileSystem();
       } catch (Exception ex) {

Modified: hadoop/hbase/branches/0.18/src/java/org/apache/hadoop/hbase/util/FSUtils.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.18/src/java/org/apache/hadoop/hbase/util/FSUtils.java?rev=705064&r1=705063&r2=705064&view=diff
==============================================================================
--- hadoop/hbase/branches/0.18/src/java/org/apache/hadoop/hbase/util/FSUtils.java (original)
+++ hadoop/hbase/branches/0.18/src/java/org/apache/hadoop/hbase/util/FSUtils.java Wed Oct 15 14:44:26 2008
@@ -72,10 +72,8 @@
     } catch (IOException e) {
       exception = RemoteExceptionHandler.checkIOException(e);
     }
-    
     try {
       fs.close();
-        
     } catch (Exception e) {
         LOG.error("file system close failed: ", e);
     }

Modified: hadoop/hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=705064&r1=705063&r2=705064&view=diff
==============================================================================
--- hadoop/hbase/trunk/CHANGES.txt (original)
+++ hadoop/hbase/trunk/CHANGES.txt Wed Oct 15 14:44:26 2008
@@ -27,6 +27,8 @@
    HBASE-928   NPE throwing RetriesExhaustedException
    HBASE-924   Update hadoop in lib on 0.18 hbase branch to 0.18.1
    HBASE-929   Clarify that ttl in HColumnDescriptor is seconds
+   HBASE-930   RegionServer stuck: HLog: Could not append. Requesting close of
+               log java.io.IOException: Could not get block locations. Aborting...
 
   IMPROVEMENTS
    HBASE-901   Add a limit to key length, check key and value length on client side

Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/FailedLogCloseException.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/FailedLogCloseException.java?rev=705064&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/FailedLogCloseException.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/FailedLogCloseException.java Wed Oct 15 14:44:26 2008
@@ -0,0 +1,46 @@
+/**
+ * Copyright 2008 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import java.io.IOException;
+
+/**
+ * Thrown when we fail close of the write-ahead-log file.
+ * Package private.  Only used inside this package.
+ */
+class FailedLogCloseException extends IOException {
+  private static final long serialVersionUID = 1759152841462990925L;
+
+  public FailedLogCloseException() {
+    super();
+  }
+
+  public FailedLogCloseException(String arg0) {
+    super(arg0);
+  }
+
+  public FailedLogCloseException(Throwable arg0) {
+    super(arg0);
+  }
+
+  public FailedLogCloseException(String arg0, Throwable arg1) {
+    super(arg0, arg1);
+  }
+}
\ No newline at end of file

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/Flusher.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/Flusher.java?rev=705064&r1=705063&r2=705064&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/Flusher.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/Flusher.java Wed Oct 15 14:44:26 2008
@@ -178,7 +178,7 @@
       // is required. Currently the only way to do this is a restart of
       // the server. Abort because hdfs is probably bad (HBASE-644 is a case
       // where hdfs was bad but passed the hdfs check).
-      LOG.fatal("Replay of hlog required. Forcing server restart", ex);
+      LOG.fatal("Replay of hlog required. Forcing server shutdown", ex);
       server.abort();
       return false;
     } catch (IOException ex) {

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HLog.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HLog.java?rev=705064&r1=705063&r2=705064&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HLog.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HLog.java Wed Oct 15 14:44:26 2008
@@ -226,9 +226,10 @@
    * cacheFlushLock and then completeCacheFlush could be called which would wait
    * for the lock on this and consequently never release the cacheFlushLock
    *
+   * @throws FailedLogCloseException
    * @throws IOException
    */
-  public void rollWriter() throws IOException {
+  public void rollWriter() throws FailedLogCloseException, IOException {
     this.cacheFlushLock.lock();
     try {
       if (closed) {
@@ -237,7 +238,14 @@
       synchronized (updateLock) {
         if (this.writer != null) {
           // Close the current writer, get a new one.
-          this.writer.close();
+          try {
+            this.writer.close();
+          } catch (IOException e) {
+            // Failed close of log file.  Means we're losing edits.  For now,
+            // shut ourselves down to minimize loss.  Alternative is to try and
+            // keep going.  See HBASE-930.
+            throw new FailedLogCloseException("#" + this.filenum, e);
+          }
           Path p = computeFilename(old_filenum);
           if (LOG.isDebugEnabled()) {
             LOG.debug("Closing current log writer " + FSUtils.getPath(p));

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/LogRoller.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/LogRoller.java?rev=705064&r1=705063&r2=705064&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/LogRoller.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/LogRoller.java Wed Oct 15 14:44:26 2008
@@ -77,8 +77,11 @@
       try {
         LOG.info("Rolling hlog. Number of entries: " + server.getLog().getNumEntries());
         server.getLog().rollWriter();
+      } catch (FailedLogCloseException e) {
+        LOG.fatal("Forcing server shutdown", e);
+        server.abort();
       } catch (IOException ex) {
-        LOG.error("Log rolling failed",
+        LOG.error("Log rolling failed with ioe: ",
             RemoteExceptionHandler.checkIOException(ex));
         server.checkFileSystem();
       } catch (Exception ex) {

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/FSUtils.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/FSUtils.java?rev=705064&r1=705063&r2=705064&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/FSUtils.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/FSUtils.java Wed Oct 15 14:44:26 2008
@@ -72,10 +72,8 @@
     } catch (IOException e) {
       exception = RemoteExceptionHandler.checkIOException(e);
     }
-    
     try {
       fs.close();
-        
     } catch (Exception e) {
         LOG.error("file system close failed: ", e);
     }