You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2008/11/25 22:50:19 UTC
svn commit: r720617 - in /hadoop/hbase/trunk: CHANGES.txt
src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
Author: stack
Date: Tue Nov 25 13:50:19 2008
New Revision: 720617
URL: http://svn.apache.org/viewvc?rev=720617&view=rev
Log:
HBASE-1020 Regionserver OOME handler should dump vital stats
Modified:
hadoop/hbase/trunk/CHANGES.txt
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
Modified: hadoop/hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=720617&r1=720616&r2=720617&view=diff
==============================================================================
--- hadoop/hbase/trunk/CHANGES.txt (original)
+++ hadoop/hbase/trunk/CHANGES.txt Tue Nov 25 13:50:19 2008
@@ -140,6 +140,7 @@
HBASE-972 Update hbase trunk to use released hadoop 0.19.0
HBASE-1022 Add storefile index size to hbase metrics
HBASE-1026 Tests in mapred are failing
+ HBASE-1020 Regionserver OOME handler should dump vital stats
NEW FEATURES
HBASE-875 Use MurmurHash instead of JenkinsHash [in bloomfilters]
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=720617&r1=720616&r2=720617&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Tue Nov 25 13:50:19 2008
@@ -434,10 +434,8 @@
housekeeping();
sleeper.sleep(lastMsg);
} // for
- } catch (OutOfMemoryError error) {
- abort();
- LOG.fatal("Ran out of memory", error);
} catch (Throwable t) {
+ checkOOME(t);
LOG.fatal("Unhandled exception. Aborting...", t);
abort();
}
@@ -550,6 +548,7 @@
isOnline = true;
} catch (IOException e) {
this.stopRequested.set(true);
+ checkOOME(e);
isOnline = false;
e = RemoteExceptionHandler.checkIOException(e);
LOG.fatal("Failed init", e);
@@ -558,6 +557,22 @@
throw ex;
}
}
+
+ /*
+ * Check if an OOME and if so, call abort.
+ * @param e
+ * @return True if we OOME'd and are aborting.
+ */
+ private boolean checkOOME(final Throwable e) {
+ boolean aborting = false;
+ if (e instanceof OutOfMemoryError ||
+ (e.getCause()!= null && e.getCause() instanceof OutOfMemoryError)) {
+ LOG.fatal("OOME, aborting.", e);
+ abort();
+ aborting = true;
+ }
+ return aborting;
+ }
/*
* Thread to shutdown the region server in an orderly manner. This thread
@@ -591,7 +606,7 @@
*/
private static class MajorCompactionChecker extends Chore {
private final HRegionServer instance;
-
+
MajorCompactionChecker(final HRegionServer h,
final int sleepTime, final AtomicBoolean stopper) {
super(sleepTime, stopper);
@@ -800,8 +815,9 @@
* from under hbase or we OOME.
*/
public void abort() {
- reservedSpace.clear();
this.abortRequested = true;
+ this.reservedSpace.clear();
+ LOG.info("Dump of metrics: " + this.metrics.toString());
stop();
}
@@ -892,7 +908,6 @@
*/
void reportSplit(HRegionInfo oldRegion, HRegionInfo newRegionA,
HRegionInfo newRegionB) {
-
outboundMsgs.add(new HMsg(HMsg.Type.MSG_REPORT_SPLIT, oldRegion,
(oldRegion.getRegionNameAsString() + " split; daughters: " +
newRegionA.getRegionNameAsString() + ", " +
@@ -1017,6 +1032,7 @@
}
}
} catch(Throwable t) {
+ checkOOME(t);
LOG.fatal("Unhandled exception", t);
} finally {
LOG.info("worker thread exiting");
@@ -1039,8 +1055,9 @@
this.compactSplitThread.
compactionRequested(region, "Region open check");
} catch (IOException e) {
- LOG.error("error opening region " + regionInfo.getRegionNameAsString(), e);
-
+ checkOOME(e);
+ LOG.error("error opening region " + regionInfo.getRegionNameAsString(),
+ e);
// TODO: add an extra field in HRegionInfo to indicate that there is
// an error. We can't do that now because that would be an incompatible
// change that would require a migration
@@ -1113,6 +1130,7 @@
LOG.error("error closing region " +
Bytes.toString(region.getRegionName()),
RemoteExceptionHandler.checkIOException(e));
+ checkOOME(e);
}
}
return regionsToClose;
@@ -1233,6 +1251,7 @@
result.putAll(map);
return new RowResult(row, result);
} catch (IOException e) {
+ checkOOME(e);
checkFileSystem();
throw e;
}
@@ -1250,6 +1269,7 @@
RowResult rr = region.getClosestRowBefore(row, columnFamily);
return rr;
} catch (IOException e) {
+ checkOOME(e);
checkFileSystem();
throw e;
}
@@ -1286,6 +1306,7 @@
}
return resultSets.toArray(new RowResult[resultSets.size()]);
} catch (IOException e) {
+ checkOOME(e);
checkFileSystem();
throw e;
}
@@ -1304,10 +1325,8 @@
try {
cacheFlusher.reclaimMemcacheMemory();
region.batchUpdate(b, getLockFromId(b.getRowLock()));
- } catch (OutOfMemoryError error) {
- abort();
- LOG.fatal("Ran out of memory", error);
} catch (IOException e) {
+ checkOOME(e);
checkFileSystem();
throw e;
}
@@ -1327,14 +1346,12 @@
locks[i] = getLockFromId(b[i].getRowLock());
region.batchUpdate(b[i], locks[i]);
}
- } catch (OutOfMemoryError error) {
- abort();
- LOG.fatal("Ran out of memory", error);
} catch(WrongRegionException ex) {
return i;
} catch (NotServingRegionException ex) {
return i;
} catch (IOException e) {
+ checkOOME(e);
checkFileSystem();
throw e;
}
@@ -1397,7 +1414,8 @@
return scannerId;
} catch (IOException e) {
LOG.error("Error opening scanner (fsOk: " + this.fsOk + ")",
- RemoteExceptionHandler.checkIOException(e));
+ RemoteExceptionHandler.checkIOException(e));
+ checkOOME(e);
checkFileSystem();
throw e;
}
@@ -1430,6 +1448,9 @@
s.close();
this.leases.cancelLease(scannerName);
} catch (IOException e) {
+ // TODO: Should we even be returning an exception out of a close?
+ // What can the client do with an exception in close?
+ checkOOME(e);
checkFileSystem();
throw e;
}
@@ -1527,7 +1548,8 @@
return lockId;
} catch (IOException e) {
LOG.error("Error obtaining row lock (fsOk: " + this.fsOk + ")",
- RemoteExceptionHandler.checkIOException(e));
+ RemoteExceptionHandler.checkIOException(e));
+ checkOOME(e);
checkFileSystem();
throw e;
}
@@ -1842,7 +1864,7 @@
}
public long getProtocolVersion(final String protocol,
- @SuppressWarnings("unused") final long clientVersion)
+ final long clientVersion)
throws IOException {
if (protocol.equals(HRegionInterface.class.getName())) {
return HBaseRPCProtocolVersion.versionID;