You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2009/02/12 09:58:24 UTC

svn commit: r743662 - in /hadoop/hbase/branches/0.19: CHANGES.txt src/java/org/apache/hadoop/hbase/ipc/HBaseRPCErrorHandler.java src/java/org/apache/hadoop/hbase/ipc/HBaseServer.java src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java

Author: apurtell
Date: Thu Feb 12 08:58:23 2009
New Revision: 743662

URL: http://svn.apache.org/viewvc?rev=743662&view=rev
Log:
HBASE-1198 OOME in IPC server does not trigger abort behavior

Added:
    hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/ipc/HBaseRPCErrorHandler.java
Modified:
    hadoop/hbase/branches/0.19/CHANGES.txt
    hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/ipc/HBaseServer.java
    hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java

Modified: hadoop/hbase/branches/0.19/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19/CHANGES.txt?rev=743662&r1=743661&r2=743662&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19/CHANGES.txt (original)
+++ hadoop/hbase/branches/0.19/CHANGES.txt Thu Feb 12 08:58:23 2009
@@ -9,6 +9,7 @@
                names (Jonathan Gray via Andrew Purtell)
    HBASE-1190  TableInputFormatBase with row filters scan too far (Dave
                Latham via Andrew Purtell)
+   HBASE-1198  OOME in IPC server does not trigger abort behavior
 
   IMPROVEMENTS
    HBASE-845   HCM.isTableEnabled doesn't really tell if it is, or not

Added: hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/ipc/HBaseRPCErrorHandler.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/ipc/HBaseRPCErrorHandler.java?rev=743662&view=auto
==============================================================================
--- hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/ipc/HBaseRPCErrorHandler.java (added)
+++ hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/ipc/HBaseRPCErrorHandler.java Thu Feb 12 08:58:23 2009
@@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.ipc;
+
+/**
+ * An interface for calling out of RPC for error conditions.
+ */
+public interface HBaseRPCErrorHandler {
+	/**
+	 * Take actions on the event of an OutOfMemoryError.
+	 * @param e the throwable
+	 * @return if the server should be shut down
+	 */
+	public boolean checkOOME(final Throwable e) ;
+}

Modified: hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/ipc/HBaseServer.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/ipc/HBaseServer.java?rev=743662&r1=743661&r2=743662&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/ipc/HBaseServer.java (original)
+++ hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/ipc/HBaseServer.java Thu Feb 12 08:58:23 2009
@@ -152,6 +152,7 @@
   private Responder responder = null;
   private int numConnections = 0;
   private Handler[] handlers = null;
+  private HBaseRPCErrorHandler errorHandler = null;
 
   /**
    * A convenience method to bind to a given address and report 
@@ -313,13 +314,22 @@
             key = null;
           }
         } catch (OutOfMemoryError e) {
-          // we can run out of memory if we have too many threads
-          // log the event and sleep for a minute and give 
-          // some thread(s) a chance to finish
-          LOG.warn("Out of Memory in server select", e);
-          closeCurrentConnection(key, e);
-          cleanupConnections(true);
-          try { Thread.sleep(60000); } catch (Exception ie) {}
+          if (errorHandler != null) {
+            if (errorHandler.checkOOME(e)) {
+              LOG.info(getName() + ": exiting on OOME");
+              closeCurrentConnection(key, e);
+              cleanupConnections(true);
+              return;
+            }
+          } else {
+            // we can run out of memory if we have too many threads
+            // log the event and sleep for a minute and give 
+            // some thread(s) a chance to finish
+            LOG.warn("Out of Memory in server select", e);
+            closeCurrentConnection(key, e);
+            cleanupConnections(true);
+            try { Thread.sleep(60000); } catch (Exception ie) {}
+      }
         } catch (InterruptedException e) {
           if (running) {                          // unexpected -- log it
             LOG.info(getName() + " caught: " +
@@ -364,7 +374,7 @@
       return (InetSocketAddress)acceptChannel.socket().getLocalSocketAddress();
     }
     
-    void doAccept(SelectionKey key) throws IOException,  OutOfMemoryError {
+    void doAccept(SelectionKey key) throws IOException, OutOfMemoryError {
       Connection c = null;
       ServerSocketChannel server = (ServerSocketChannel) key.channel();
       // accept up to 10 connections
@@ -501,13 +511,20 @@
             }
           }
         } catch (OutOfMemoryError e) {
-          //
-          // we can run out of memory if we have too many threads
-          // log the event and sleep for a minute and give
-          // some thread(s) a chance to finish
-          //
-          LOG.warn("Out of Memory in server select", e);
-          try { Thread.sleep(60000); } catch (Exception ie) {}
+          if (errorHandler != null) {
+            if (errorHandler.checkOOME(e)) {
+              LOG.info(getName() + ": exiting on OOME");
+              return;
+            }
+          } else {
+            //
+            // we can run out of memory if we have too many threads
+            // log the event and sleep for a minute and give
+            // some thread(s) a chance to finish
+            //
+            LOG.warn("Out of Memory in server select", e);
+            try { Thread.sleep(60000); } catch (Exception ie) {}
+      }
         } catch (Exception e) {
           LOG.warn("Exception in Responder " + 
                    StringUtils.stringifyException(e));
@@ -926,6 +943,16 @@
             LOG.info(getName() + " caught: " +
                      StringUtils.stringifyException(e));
           }
+        } catch (OutOfMemoryError e) {
+          if (errorHandler != null) {
+            if (errorHandler.checkOOME(e)) {
+              LOG.info(getName() + ": exiting on OOME");
+              return;
+            }
+          } else {
+              // rethrow if no handler
+              throw e;
+          }
         } catch (Exception e) {
           LOG.info(getName() + " caught: " +
                    StringUtils.stringifyException(e));
@@ -1060,8 +1087,15 @@
   public int getCallQueueLen() {
     return callQueue.size();
   }
-  
-  
+
+  /**
+   * Set the handler for calling out of RPC for error conditions.
+   * @param handler the handler implementation
+   */
+  public void setErrorHandler(HBaseRPCErrorHandler handler) {
+    this.errorHandler = handler;
+  }      
+
   /**
    * When the read or write buffer size is larger than this limit, i/o will be 
    * done in chunks of this size. Most RPC requests and responses would be

Modified: hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=743662&r1=743661&r2=743662&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hadoop/hbase/branches/0.19/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Thu Feb 12 08:58:23 2009
@@ -88,6 +88,7 @@
 import org.apache.hadoop.hbase.io.HbaseMapWritable;
 import org.apache.hadoop.hbase.io.RowResult;
 import org.apache.hadoop.hbase.ipc.HBaseRPC;
+import org.apache.hadoop.hbase.ipc.HBaseRPCErrorHandler;
 import org.apache.hadoop.hbase.ipc.HBaseRPCProtocolVersion;
 import org.apache.hadoop.hbase.ipc.HBaseServer;
 import org.apache.hadoop.hbase.ipc.HMasterRegionInterface;
@@ -107,7 +108,7 @@
  * HRegionServer makes a set of HRegions available to clients.  It checks in with
  * the HMaster. There are many HRegionServers in a single HBase deployment.
  */
-public class HRegionServer implements HConstants, HRegionInterface, Runnable {
+public class HRegionServer implements HConstants, HRegionInterface, HBaseRPCErrorHandler, Runnable {
   static final Log LOG = LogFactory.getLog(HRegionServer.class);
   private static final HMsg REPORT_EXITING = new HMsg(Type.MSG_REPORT_EXITING);
   private static final HMsg REPORT_QUIESCED = new HMsg(Type.MSG_REPORT_QUIESCED);
@@ -271,6 +272,7 @@
     this.server = HBaseRPC.getServer(this, address.getBindAddress(), 
       address.getPort(), conf.getInt("hbase.regionserver.handler.count", 10),
       false, conf);
+    this.server.setErrorHandler(this);
     // Address is givin a default IP for the moment. Will be changed after
     // calling the master.
     this.serverInfo = new HServerInfo(new HServerAddress(
@@ -704,7 +706,7 @@
    * @param e
    * @return True if we OOME'd and are aborting.
    */
-  private boolean checkOOME(final Throwable e) {
+  public boolean checkOOME(final Throwable e) {
     boolean stop = false;
     if (e instanceof OutOfMemoryError ||
       (e.getCause() != null && e.getCause() instanceof OutOfMemoryError) ||