You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2009/02/12 09:56:49 UTC
svn commit: r743660 - in /hadoop/hbase/trunk: CHANGES.txt
src/java/org/apache/hadoop/hbase/ipc/HBaseRPCErrorHandler.java
src/java/org/apache/hadoop/hbase/ipc/HBaseServer.java
src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
Author: apurtell
Date: Thu Feb 12 08:56:49 2009
New Revision: 743660
URL: http://svn.apache.org/viewvc?rev=743660&view=rev
Log:
HBASE-1198 OOME in IPC server does not trigger abort behavior
Added:
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HBaseRPCErrorHandler.java
Modified:
hadoop/hbase/trunk/CHANGES.txt
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HBaseServer.java
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
Modified: hadoop/hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=743660&r1=743659&r2=743660&view=diff
==============================================================================
--- hadoop/hbase/trunk/CHANGES.txt (original)
+++ hadoop/hbase/trunk/CHANGES.txt Thu Feb 12 08:56:49 2009
@@ -21,6 +21,7 @@
name (Jonathan Gray via Andrew Purtell)
HBASE-1190 TableInputFormatBase with row filters scan too far (Dave
Latham via Andrew Purtell)
+ HBASE-1198 OOME in IPC server does not trigger abort behavior
IMPROVEMENTS
HBASE-1089 Add count of regions on filesystem to master UI; add percentage
Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HBaseRPCErrorHandler.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HBaseRPCErrorHandler.java?rev=743660&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HBaseRPCErrorHandler.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HBaseRPCErrorHandler.java Thu Feb 12 08:56:49 2009
@@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.ipc;
+
+/**
+ * An interface for calling out of RPC for error conditions.
+ */
+public interface HBaseRPCErrorHandler {
+ /**
+ * Take actions on the event of an OutOfMemoryError.
+ * @param e the throwable
+ * @return if the server should be shut down
+ */
+ public boolean checkOOME(final Throwable e) ;
+}
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HBaseServer.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HBaseServer.java?rev=743660&r1=743659&r2=743660&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HBaseServer.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HBaseServer.java Thu Feb 12 08:56:49 2009
@@ -152,6 +152,7 @@
private Responder responder = null;
private int numConnections = 0;
private Handler[] handlers = null;
+ private HBaseRPCErrorHandler errorHandler = null;
/**
* A convenience method to bind to a given address and report
@@ -313,13 +314,22 @@
key = null;
}
} catch (OutOfMemoryError e) {
- // we can run out of memory if we have too many threads
- // log the event and sleep for a minute and give
- // some thread(s) a chance to finish
- LOG.warn("Out of Memory in server select", e);
- closeCurrentConnection(key, e);
- cleanupConnections(true);
- try { Thread.sleep(60000); } catch (Exception ie) {}
+ if (errorHandler != null) {
+ if (errorHandler.checkOOME(e)) {
+ LOG.info(getName() + ": exiting on OOME");
+ closeCurrentConnection(key, e);
+ cleanupConnections(true);
+ return;
+ }
+ } else {
+ // we can run out of memory if we have too many threads
+ // log the event and sleep for a minute and give
+ // some thread(s) a chance to finish
+ LOG.warn("Out of Memory in server select", e);
+ closeCurrentConnection(key, e);
+ cleanupConnections(true);
+ try { Thread.sleep(60000); } catch (Exception ie) {}
+ }
} catch (InterruptedException e) {
if (running) { // unexpected -- log it
LOG.info(getName() + " caught: " +
@@ -364,7 +374,7 @@
return (InetSocketAddress)acceptChannel.socket().getLocalSocketAddress();
}
- void doAccept(SelectionKey key) throws IOException, OutOfMemoryError {
+ void doAccept(SelectionKey key) throws IOException, OutOfMemoryError {
Connection c = null;
ServerSocketChannel server = (ServerSocketChannel) key.channel();
// accept up to 10 connections
@@ -501,13 +511,20 @@
}
}
} catch (OutOfMemoryError e) {
- //
- // we can run out of memory if we have too many threads
- // log the event and sleep for a minute and give
- // some thread(s) a chance to finish
- //
- LOG.warn("Out of Memory in server select", e);
- try { Thread.sleep(60000); } catch (Exception ie) {}
+ if (errorHandler != null) {
+ if (errorHandler.checkOOME(e)) {
+ LOG.info(getName() + ": exiting on OOME");
+ return;
+ }
+ } else {
+ //
+ // we can run out of memory if we have too many threads
+ // log the event and sleep for a minute and give
+ // some thread(s) a chance to finish
+ //
+ LOG.warn("Out of Memory in server select", e);
+ try { Thread.sleep(60000); } catch (Exception ie) {}
+ }
} catch (Exception e) {
LOG.warn("Exception in Responder " +
StringUtils.stringifyException(e));
@@ -926,6 +943,16 @@
LOG.info(getName() + " caught: " +
StringUtils.stringifyException(e));
}
+ } catch (OutOfMemoryError e) {
+ if (errorHandler != null) {
+ if (errorHandler.checkOOME(e)) {
+ LOG.info(getName() + ": exiting on OOME");
+ return;
+ }
+ } else {
+ // rethrow if no handler
+ throw e;
+ }
} catch (Exception e) {
LOG.info(getName() + " caught: " +
StringUtils.stringifyException(e));
@@ -1060,8 +1087,15 @@
public int getCallQueueLen() {
return callQueue.size();
}
-
-
+
+ /**
+ * Set the handler for calling out of RPC for error conditions.
+ * @param handler the handler implementation
+ */
+ public void setErrorHandler(HBaseRPCErrorHandler handler) {
+ this.errorHandler = handler;
+ }
+
/**
* When the read or write buffer size is larger than this limit, i/o will be
* done in chunks of this size. Most RPC requests and responses would be
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=743660&r1=743659&r2=743660&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Thu Feb 12 08:56:49 2009
@@ -88,6 +88,7 @@
import org.apache.hadoop.hbase.io.HbaseMapWritable;
import org.apache.hadoop.hbase.io.RowResult;
import org.apache.hadoop.hbase.ipc.HBaseRPC;
+import org.apache.hadoop.hbase.ipc.HBaseRPCErrorHandler;
import org.apache.hadoop.hbase.ipc.HBaseRPCProtocolVersion;
import org.apache.hadoop.hbase.ipc.HBaseServer;
import org.apache.hadoop.hbase.ipc.HMasterRegionInterface;
@@ -108,7 +109,7 @@
* HRegionServer makes a set of HRegions available to clients. It checks in with
* the HMaster. There are many HRegionServers in a single HBase deployment.
*/
-public class HRegionServer implements HConstants, HRegionInterface, Runnable {
+public class HRegionServer implements HConstants, HRegionInterface, HBaseRPCErrorHandler, Runnable {
static final Log LOG = LogFactory.getLog(HRegionServer.class);
private static final HMsg REPORT_EXITING = new HMsg(Type.MSG_REPORT_EXITING);
private static final HMsg REPORT_QUIESCED = new HMsg(Type.MSG_REPORT_QUIESCED);
@@ -274,6 +275,7 @@
this.server = HBaseRPC.getServer(this, address.getBindAddress(),
address.getPort(), conf.getInt("hbase.regionserver.handler.count", 10),
false, conf);
+ this.server.setErrorHandler(this);
// Address is givin a default IP for the moment. Will be changed after
// calling the master.
this.serverInfo = new HServerInfo(new HServerAddress(
@@ -718,7 +720,7 @@
* @param e
* @return True if we OOME'd and are aborting.
*/
- private boolean checkOOME(final Throwable e) {
+ public boolean checkOOME(final Throwable e) {
boolean stop = false;
if (e instanceof OutOfMemoryError ||
(e.getCause() != null && e.getCause() instanceof OutOfMemoryError) ||