You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by li...@apache.org on 2013/06/29 21:51:03 UTC

svn commit: r1497992 - in /hbase/branches/0.89-fb/src: main/java/org/apache/hadoop/hbase/regionserver/ main/java/org/apache/hadoop/hbase/regionserver/wal/ test/java/org/apache/hadoop/hbase/regionserver/wal/

Author: liyin
Date: Sat Jun 29 18:18:46 2013
New Revision: 1497992

URL: http://svn.apache.org/r1497992
Log:
[0.89-fb] [HBASE-8228] Change the order of locks for HRegion.updatesLock and HLog.cacheFlushLock during a cache flush

Author: aaiyer

Summary:
We have seen cases where HLog.startCacheFlush() may block, waiting for a
compaction + log roll to complete; while holding the updatesLock for a region.

The waiting is fine. But, it is unacceptable to block all writes to the region
if the lock cannot be granted.

A long term fix to this is to get rid of the cacheFlushLock, since we no
longer write the complete flush entry in the log, and it can be deprecated.

For now, just change the order of locks for HRegion.updatesLock and HLog.cacheFlushLock during a cache flush

Test Plan: test suite; deploy/run on shadow

Reviewers: liyintang

Reviewed By: liyintang

CC: hbase-eng@, adela

Differential Revision: https://phabricator.fb.com/D867128

Modified:
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLog.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java?rev=1497992&r1=1497991&r2=1497992&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java Sat Jun 29 18:18:46 2013
@@ -1386,6 +1386,12 @@ public class HRegion implements HeapSize
     // rows then)
     status.setStatus("Obtaining lock to block concurrent updates");
     long t0, t1;
+
+    if (wal != null) {
+      // We will ask the WAL to avoid rolling the log, until the flush is done.
+      // This may stall. So, doing this before we get the updatesLock.
+        wal.startCacheFlush();
+    }
     this.updatesLock.writeLock().lock();
     t0 = EnvironmentEdgeManager.currentTimeMillis();
     status.setStatus("Preparing to flush by snapshotting stores");
@@ -1395,7 +1401,7 @@ public class HRegion implements HeapSize
         stores.size());
     try {
       sequenceId = (wal == null)? myseqid :
-        wal.startCacheFlush(this.regionInfo.getRegionName());
+        wal.getStartCacheFlushSeqNum(this.regionInfo.getRegionName());
       completeSequenceId = this.getCompleteCacheFlushSequenceId(sequenceId);
       for (Store s : stores.values()) {
         storeFlushers.add(s.getStoreFlusher(completeSequenceId));

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java?rev=1497992&r1=1497991&r2=1497992&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java Sat Jun 29 18:18:46 2013
@@ -1335,13 +1335,19 @@ public class HLog implements Syncable {
     return outputfiles.size();
   }
 
+
   /**
-   * By acquiring a log sequence ID, we can allow log messages to continue while
-   * we flush the cache.
-   *
    * Acquire a lock so that we do not roll the log between the start and
    * completion of a cache-flush. Otherwise the log-seq-id for the flush will
    * not appear in the correct logfile.
+   */
+  public void startCacheFlush() {
+    this.cacheFlushLock.readLock().lock();
+  }
+
+  /**
+   * By acquiring a log sequence ID, we can allow log messages to continue while
+   * we flush the cache.
    *
    * In this method, by removing the entry in firstSeqWritten for the region
    * being flushed we ensure that the next edit inserted in this region will be
@@ -1354,8 +1360,7 @@ public class HLog implements Syncable {
    * @see #completeCacheFlush(byte[], byte[], long, boolean)
    * @see #abortCacheFlush()
    */
-  public long startCacheFlush(final byte [] regionName) {
-    this.cacheFlushLock.readLock().lock();
+  public long getStartCacheFlushSeqNum(final byte [] regionName) {
     if (this.firstSeqWrittenInSnapshotMemstore.containsKey(regionName)) {
       LOG.warn("Requested a startCacheFlush while firstSeqWrittenInSnapshotMemstore still"
           + " contains " + Bytes.toString(regionName) + " . Did the previous flush fail?"

Modified: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLog.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLog.java?rev=1497992&r1=1497991&r2=1497992&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLog.java (original)
+++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLog.java Sat Jun 29 18:18:46 2013
@@ -461,7 +461,8 @@ public class TestHLog  {
         row,Bytes.toBytes(Bytes.toString(row) + "1"), false);
       final byte [] regionName = info.getRegionName();
       log.append(info, tableName, cols, System.currentTimeMillis());
-      long logSeqId = log.startCacheFlush(info.getRegionName());
+      log.startCacheFlush();
+      long logSeqId = log.getStartCacheFlushSeqNum(info.getRegionName());
       log.completeCacheFlush(regionName, tableName, logSeqId, info.isMetaRegion());
       log.close();
       Path filename = log.computeFilename();
@@ -519,7 +520,8 @@ public class TestHLog  {
       HRegionInfo hri = new HRegionInfo(new HTableDescriptor(tableName),
           HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
       log.append(hri, tableName, cols, System.currentTimeMillis());
-      long logSeqId = log.startCacheFlush(hri.getRegionName());
+      log.startCacheFlush();
+      long logSeqId = log.getStartCacheFlushSeqNum(hri.getRegionName());
       log.completeCacheFlush(hri.getRegionName(), tableName, logSeqId, false);
       log.close();
       Path filename = log.computeFilename();
@@ -583,7 +585,8 @@ public class TestHLog  {
     // Flush the first region, we expect to see the first two files getting
     // archived
     addEdits(log, hri, tableName, 1);
-    long seqId = log.startCacheFlush(hri.getRegionName());
+    log.startCacheFlush();
+    long seqId = log.getStartCacheFlushSeqNum(hri.getRegionName());
     log.completeCacheFlush(hri.getRegionName(), tableName, seqId, false);
     log.rollWriter();
     assertEquals(2, log.getNumLogFiles());
@@ -591,7 +594,8 @@ public class TestHLog  {
     // Flush the second region, which removes all the remaining output files
     // since the oldest was completely flushed.
     addEdits(log, hri2, tableName2, 1);
-    seqId = log.startCacheFlush(hri2.getRegionName());
+    log.startCacheFlush();
+    seqId = log.getStartCacheFlushSeqNum(hri2.getRegionName());
     log.completeCacheFlush(hri2.getRegionName(), tableName2, seqId, false);
     log.rollWriter();
     assertEquals(0, log.getNumLogFiles());

Modified: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java?rev=1497992&r1=1497991&r2=1497992&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java (original)
+++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java Sat Jun 29 18:18:46 2013
@@ -420,7 +420,8 @@ public class TestWALReplay {
     }
 
     // Add a cache flush, shouldn't have any effect
-    long logSeqId = wal.startCacheFlush(regionName);
+    wal.startCacheFlush();
+    long logSeqId = wal.getStartCacheFlushSeqNum(regionName);
     wal.completeCacheFlush(regionName, tableName, logSeqId, hri.isMetaRegion());
 
     // Add an edit to another family, should be skipped.
@@ -646,4 +647,4 @@ public class TestWALReplay {
     HBaseTestingUtility.setMaxRecoveryErrorCount(wal.getOutputStream(), 1);
     return wal;
   }
-}
\ No newline at end of file
+}