You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2008/11/25 22:50:19 UTC

svn commit: r720617 - in /hadoop/hbase/trunk: CHANGES.txt src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java

Author: stack
Date: Tue Nov 25 13:50:19 2008
New Revision: 720617

URL: http://svn.apache.org/viewvc?rev=720617&view=rev
Log:
HBASE-1020 Regionserver OOME handler should dump vital stats

Modified:
    hadoop/hbase/trunk/CHANGES.txt
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java

Modified: hadoop/hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=720617&r1=720616&r2=720617&view=diff
==============================================================================
--- hadoop/hbase/trunk/CHANGES.txt (original)
+++ hadoop/hbase/trunk/CHANGES.txt Tue Nov 25 13:50:19 2008
@@ -140,6 +140,7 @@
    HBASE-972   Update hbase trunk to use released hadoop 0.19.0
    HBASE-1022  Add storefile index size to hbase metrics
    HBASE-1026  Tests in mapred are failing
+   HBASE-1020  Regionserver OOME handler should dump vital stats
  
   NEW FEATURES
    HBASE-875   Use MurmurHash instead of JenkinsHash [in bloomfilters]

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=720617&r1=720616&r2=720617&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Tue Nov 25 13:50:19 2008
@@ -434,10 +434,8 @@
         housekeeping();
         sleeper.sleep(lastMsg);
       } // for
-    } catch (OutOfMemoryError error) {
-      abort();
-      LOG.fatal("Ran out of memory", error);
     } catch (Throwable t) {
+      checkOOME(t);
       LOG.fatal("Unhandled exception. Aborting...", t);
       abort();
     }
@@ -550,6 +548,7 @@
       isOnline = true;
     } catch (IOException e) {
       this.stopRequested.set(true);
+      checkOOME(e);
       isOnline = false;
       e = RemoteExceptionHandler.checkIOException(e); 
       LOG.fatal("Failed init", e);
@@ -558,6 +557,22 @@
       throw ex;
     }
   }
+  
+  /*
+   * Check if an OOME and if so, call abort.
+   * @param e
+   * @return True if we OOME'd and are aborting.
+   */
+  private boolean checkOOME(final Throwable e) {
+    boolean aborting = false;
+    if (e instanceof OutOfMemoryError ||
+        (e.getCause()!= null && e.getCause() instanceof OutOfMemoryError)) {
+      LOG.fatal("OOME, aborting.", e);
+      abort();
+      aborting = true;
+    }
+    return aborting;
+  }
 
   /*
    * Thread to shutdown the region server in an orderly manner.  This thread
@@ -591,7 +606,7 @@
    */
   private static class MajorCompactionChecker extends Chore {
     private final HRegionServer instance;
-    
+
     MajorCompactionChecker(final HRegionServer h,
         final int sleepTime, final AtomicBoolean stopper) {
       super(sleepTime, stopper);
@@ -800,8 +815,9 @@
    * from under hbase or we OOME.
    */
   public void abort() {
-    reservedSpace.clear();
     this.abortRequested = true;
+    this.reservedSpace.clear();
+    LOG.info("Dump of metrics: " + this.metrics.toString());
     stop();
   }
 
@@ -892,7 +908,6 @@
    */
   void reportSplit(HRegionInfo oldRegion, HRegionInfo newRegionA,
       HRegionInfo newRegionB) {
-
     outboundMsgs.add(new HMsg(HMsg.Type.MSG_REPORT_SPLIT, oldRegion,
       (oldRegion.getRegionNameAsString() + " split; daughters: " +
         newRegionA.getRegionNameAsString() + ", " +
@@ -1017,6 +1032,7 @@
           }
         }
       } catch(Throwable t) {
+        checkOOME(t);
         LOG.fatal("Unhandled exception", t);
       } finally {
         LOG.info("worker thread exiting");
@@ -1039,8 +1055,9 @@
         this.compactSplitThread.
           compactionRequested(region, "Region open check");
       } catch (IOException e) {
-        LOG.error("error opening region " + regionInfo.getRegionNameAsString(), e);
-
+        checkOOME(e);
+        LOG.error("error opening region " + regionInfo.getRegionNameAsString(),
+          e);
         // TODO: add an extra field in HRegionInfo to indicate that there is
         // an error. We can't do that now because that would be an incompatible
         // change that would require a migration
@@ -1113,6 +1130,7 @@
         LOG.error("error closing region " +
             Bytes.toString(region.getRegionName()),
           RemoteExceptionHandler.checkIOException(e));
+        checkOOME(e);
       }
     }
     return regionsToClose;
@@ -1233,6 +1251,7 @@
       result.putAll(map);
       return new RowResult(row, result);
     } catch (IOException e) {
+      checkOOME(e);
       checkFileSystem();
       throw e;
     }
@@ -1250,6 +1269,7 @@
       RowResult rr = region.getClosestRowBefore(row, columnFamily);
       return rr;
     } catch (IOException e) {
+      checkOOME(e);
       checkFileSystem();
       throw e;
     }
@@ -1286,6 +1306,7 @@
       }
       return resultSets.toArray(new RowResult[resultSets.size()]);
     } catch (IOException e) {
+      checkOOME(e);
       checkFileSystem();
       throw e;
     }
@@ -1304,10 +1325,8 @@
     try {
       cacheFlusher.reclaimMemcacheMemory();
       region.batchUpdate(b, getLockFromId(b.getRowLock()));
-    } catch (OutOfMemoryError error) {
-      abort();
-      LOG.fatal("Ran out of memory", error);
     } catch (IOException e) {
+      checkOOME(e);
       checkFileSystem();
       throw e;
     }
@@ -1327,14 +1346,12 @@
         locks[i] = getLockFromId(b[i].getRowLock());
         region.batchUpdate(b[i], locks[i]);
       }
-    } catch (OutOfMemoryError error) {
-      abort();
-      LOG.fatal("Ran out of memory", error);
     } catch(WrongRegionException ex) {
       return i;
     } catch (NotServingRegionException ex) {
       return i;
     } catch (IOException e) {
+      checkOOME(e);
       checkFileSystem();
       throw e;
     }
@@ -1397,7 +1414,8 @@
       return scannerId;
     } catch (IOException e) {
       LOG.error("Error opening scanner (fsOk: " + this.fsOk + ")",
-          RemoteExceptionHandler.checkIOException(e));
+        RemoteExceptionHandler.checkIOException(e));
+      checkOOME(e);
       checkFileSystem();
       throw e;
     }
@@ -1430,6 +1448,9 @@
       s.close();
       this.leases.cancelLease(scannerName);
     } catch (IOException e) {
+      // TODO: Should we even be returning an exception out of a close?
+      // What can the client do with an exception in close?
+      checkOOME(e);
       checkFileSystem();
       throw e;
     }
@@ -1527,7 +1548,8 @@
       return lockId;
     } catch (IOException e) {
       LOG.error("Error obtaining row lock (fsOk: " + this.fsOk + ")",
-          RemoteExceptionHandler.checkIOException(e));
+        RemoteExceptionHandler.checkIOException(e));
+      checkOOME(e);
       checkFileSystem();
       throw e;
     }
@@ -1842,7 +1864,7 @@
   }
 
   public long getProtocolVersion(final String protocol, 
-      @SuppressWarnings("unused") final long clientVersion)
+      final long clientVersion)
   throws IOException {  
     if (protocol.equals(HRegionInterface.class.getName())) {
       return HBaseRPCProtocolVersion.versionID;