You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2008/06/07 06:45:41 UTC

svn commit: r664280 - in /hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase: RegionHistorian.java regionserver/Flusher.java regionserver/HRegionServer.java

Author: stack
Date: Fri Jun  6 21:45:41 2008
New Revision: 664280

URL: http://svn.apache.org/viewvc?rev=664280&view=rev
Log:
HBASE-670 Historian deadlocks if regionserver is at global memory boundary and is hosting .META.; version 2

Modified:
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/RegionHistorian.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/Flusher.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/RegionHistorian.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/RegionHistorian.java?rev=664280&r1=664279&r2=664280&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/RegionHistorian.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/RegionHistorian.java Fri Jun  6 21:45:41 2008
@@ -180,11 +180,16 @@
    * Method to add a compaction event to the row in the .META table
    * @param info
    */
-  public void addRegionCompaction(HRegionInfo info,
-    @SuppressWarnings("unused") String timeTaken) {
-    // Disabled.  Noop.  If this regionserver is hosting the .META. AND is
-    // holding the reclaimMemcacheMemory global lock, we deadlock.  For now,
-    // just disable logging of flushes and compactions.
+  public void addRegionCompaction(final HRegionInfo info,
+      final String timeTaken) {
+    // While historian can not log flushes because it could deadlock the
+    // regionserver -- see the note in addRegionFlush -- there should be no
+    // such danger compacting; compactions are not allowed when
+    // Flusher#flushSomeRegions is run.
+    if (LOG.isDebugEnabled()) {
+      add(HistorianColumnKey.REGION_COMPACTION.key,
+        "Region compaction completed in " + timeTaken, info);
+    }
   }
 
   /**
@@ -194,8 +199,9 @@
   public void addRegionFlush(HRegionInfo info,
     @SuppressWarnings("unused") String timeTaken) {
     // Disabled.  Noop.  If this regionserver is hosting the .META. AND is
-    // holding the reclaimMemcacheMemory global lock, we deadlock.  For now,
-    // just disable logging of flushes and compactions.
+    // holding the reclaimMemcacheMemory global lock --
+    // see Flusher#flushSomeRegions --  we deadlock.  For now, just disable
+    // logging of flushes.
   }
 
   /**

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/Flusher.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/Flusher.java?rev=664280&r1=664279&r2=664280&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/Flusher.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/Flusher.java Fri Jun  6 21:45:41 2008
@@ -53,7 +53,6 @@
   private final long optionalFlushPeriod;
   private final HRegionServer server;
   private final ReentrantLock lock = new ReentrantLock();
-  private final Integer memcacheSizeLock = new Integer(0);  
   private long lastOptionalCheck = System.currentTimeMillis();
 
   protected final long globalMemcacheLimit;
@@ -126,23 +125,30 @@
     }
   }
   
-  /**
-   * Flush a region right away, while respecting concurrency with the async
-   * flushing that is always going on.
+  /*
+   * Flush a region.
    * 
    * @param region the region to be flushed
-   * @param removeFromQueue true if the region needs to be removed from the
-   * flush queue. False if called from the main run loop and true if called from
-   * flushSomeRegions to relieve memory pressure from the region server.
+   * @param removeFromQueue True if the region needs to be removed from the
+   * flush queue. False if called from the main flusher run loop and true if
+   * called from flushSomeRegions to relieve memory pressure from the region
+   * server.  If <code>true</code>, we are in a state of emergency; we are not
+   * taking on updates regionserver-wide, not until memory is flushed.  In this
+   * case, do not let a compaction run inline with blocked updates. Compactions
+   * can take a long time. Stopping compactions, there is a danger that number
+   * of flushes will overwhelm compaction on a busy server; we'll have to see.
+   * That compactions do not run when called out of flushSomeRegions means that
+   * compactions can be reported by the historian without danger of deadlock
+   * (HBASE-670).
    * 
    * <p>In the main run loop, regions have already been removed from the flush
    * queue, and if this method is called for the relief of memory pressure,
    * this may not be necessarily true. We want to avoid trying to remove 
-   * region from the queue because if it has already been removed, it reqires a
+   * region from the queue because if it has already been removed, it requires a
    * sequential scan of the queue to determine that it is not in the queue.
    * 
    * <p>If called from flushSomeRegions, the region may be in the queue but
-   * it may have been determined that the region had a significant amout of 
+   * it may have been determined that the region had a significant amount of 
    * memory in use and needed to be flushed to relieve memory pressure. In this
    * case, its flush may preempt the pending request in the queue, and if so,
    * it needs to be removed from the queue to avoid flushing the region multiple
@@ -163,7 +169,9 @@
       }
       lock.lock();
       try {
-        if (region.flushcache()) {
+        // See javadoc comment above for removeFromQueue on why we do not
+        // compact if removeFromQueue is true.
+        if (region.flushcache() && !removeFromQueue) {
           server.compactSplitThread.compactionRequested(region);
         }
       } catch (DroppedSnapshotException ex) {
@@ -242,38 +250,26 @@
    * amount of memcache consumption.
    */
   public void reclaimMemcacheMemory() {
-    synchronized (memcacheSizeLock) {
-      if (server.getGlobalMemcacheSize() >= globalMemcacheLimit) {
-        flushSomeRegions();
-      }
+    if (server.getGlobalMemcacheSize() >= globalMemcacheLimit) {
+      flushSomeRegions();
     }
   }
-  
-  private void flushSomeRegions() {
-    // we'll sort the regions in reverse
-    SortedMap<Long, HRegion> sortedRegions = new TreeMap<Long, HRegion>(
-      new Comparator<Long>() {
-        public int compare(Long a, Long b) {
-          return -1 * a.compareTo(b);
-        }
-      }
-    );
-    
-    // copy over all the regions
-    for (HRegion region : server.onlineRegions.values()) {
-      sortedRegions.put(region.memcacheSize.get(), region);
-    }
-    
+
+  /*
+   * Emergency!  Need to flush memory.  While running this method all updates
+   * to this regionserver are blocked.
+   */
+  private synchronized void flushSomeRegions() {
+    SortedMap<Long, HRegion> m =
+      this.server.getCopyOfOnlineRegionsSortedBySize();
     // keep flushing until we hit the low water mark
     while (server.getGlobalMemcacheSize() >= globalMemcacheLimitLowMark) {
       // flush the region with the biggest memcache
-      HRegion biggestMemcacheRegion = 
-        sortedRegions.remove(sortedRegions.firstKey());
+      HRegion biggestMemcacheRegion = m.remove(m.firstKey());
       if (!flushRegion(biggestMemcacheRegion, true)) {
         // Something bad happened - give up.
         break;
       }
     }
   }
-  
 }
\ No newline at end of file

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=664280&r1=664279&r2=664280&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Fri Jun  6 21:45:41 2008
@@ -28,6 +28,7 @@
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.Comparator;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -35,6 +36,7 @@
 import java.util.Map;
 import java.util.Random;
 import java.util.Set;
+import java.util.SortedMap;
 import java.util.TreeMap;
 import java.util.TreeSet;
 import java.util.concurrent.BlockingQueue;
@@ -1305,6 +1307,27 @@
   public Collection<HRegion> getOnlineRegions() {
     return Collections.unmodifiableCollection(onlineRegions.values());
   }
+
+  /**
+   * @return A new Map of online regions sorted by region size with the first
+   * entry being the biggest.
+   */
+  public SortedMap<Long, HRegion> getCopyOfOnlineRegionsSortedBySize() {
+    // we'll sort the regions in reverse
+    SortedMap<Long, HRegion> sortedRegions = new TreeMap<Long, HRegion>(
+        new Comparator<Long>() {
+          public int compare(Long a, Long b) {
+            return -1 * a.compareTo(b);
+          }
+        });
+    // Copy over all regions. Regions are sorted by size with biggest first.
+    synchronized (this.onlineRegions) {
+      for (HRegion region : this.onlineRegions.values()) {
+        sortedRegions.put(region.memcacheSize.get(), region);
+      }
+    }
+    return sortedRegions;
+  }
   
   /**
    * @param regionName