You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2017/08/11 20:08:34 UTC

[1/6] hbase git commit: HBASE-18025 CatalogJanitor should collect outdated RegionStates from the AM [Forced Update!]

Repository: hbase
Updated Branches:
  refs/heads/branch-1.4 b5ae0ffca -> aaece0ba5 (forced update)


HBASE-18025 CatalogJanitor should collect outdated RegionStates from the AM


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/aaece0ba
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/aaece0ba
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/aaece0ba

Branch: refs/heads/branch-1.4
Commit: aaece0ba5e399f248c8255fe509cdb1a862bf299
Parents: 3b9c58b
Author: Esteban Gutierrez <es...@apache.org>
Authored: Fri Aug 11 12:56:20 2017 -0500
Committer: Andrew Purtell <ap...@apache.org>
Committed: Fri Aug 11 13:08:28 2017 -0700

----------------------------------------------------------------------
 .../hadoop/hbase/master/CatalogJanitor.java     |   4 +
 .../hadoop/hbase/master/RegionStates.java       |   6 +
 .../hadoop/hbase/master/ServerManager.java      |   7 +
 .../TestCatalogJanitorInMemoryStates.java       | 188 +++++++++++++++++++
 4 files changed, 205 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/aaece0ba/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
index 17644eb..00dc4a5 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
@@ -217,6 +217,8 @@ public class CatalogJanitor extends ScheduledChore {
       HFileArchiver.archiveRegion(this.services.getConfiguration(), fs, regionA);
       HFileArchiver.archiveRegion(this.services.getConfiguration(), fs, regionB);
       MetaTableAccessor.deleteMergeQualifiers(services.getConnection(), mergedRegion);
+      services.getAssignmentManager().getRegionStates().deleteRegion(regionA);
+      services.getAssignmentManager().getRegionStates().deleteRegion(regionB);
       services.getServerManager().removeRegion(regionA);
       services.getServerManager().removeRegion(regionB);
       return true;
@@ -361,6 +363,8 @@ public class CatalogJanitor extends ScheduledChore {
       if (LOG.isTraceEnabled()) LOG.trace("Archiving parent region: " + parent);
       HFileArchiver.archiveRegion(this.services.getConfiguration(), fs, parent);
       MetaTableAccessor.deleteRegion(this.connection, parent);
+      if (services.getAssignmentManager().getRegionStates() != null)
+        services.getAssignmentManager().getRegionStates().deleteRegion(parent);
       services.getServerManager().removeRegion(parent);
       result = true;
     }

http://git-wip-us.apache.org/repos/asf/hbase/blob/aaece0ba/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
index 082b5cc..599e649 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
@@ -899,6 +899,12 @@ public class RegionStates {
     }
   }
 
+  @VisibleForTesting
+  public boolean isRegionInRegionStates(final HRegionInfo hri) {
+    return (getRegionState(hri) != null || isRegionOnline(hri)) || isRegionInTransition(hri)
+        || isRegionInState(hri, State.OFFLINE, State.CLOSED);
+     }
+
   /**
    * Checking if a region was assigned to a server which is not online now.
    * If so, we should hold re-assign this region till SSH has split its wals.

http://git-wip-us.apache.org/repos/asf/hbase/blob/aaece0ba/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
index 93e532b..040342f 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
@@ -1311,6 +1311,13 @@ public class ServerManager {
     flushedSequenceIdByRegion.remove(encodedName);
   }
 
+  @VisibleForTesting
+  public boolean isRegionInServerManagerStates(final HRegionInfo hri) {
+    final byte[] encodedName = hri.getEncodedNameAsBytes();
+    return (storeFlushedSequenceIdsByRegion.containsKey(encodedName)
+        || flushedSequenceIdByRegion.containsKey(encodedName));
+  }
+
   /**
    * Called by delete table and similar to notify the ServerManager that a region was removed.
    */

http://git-wip-us.apache.org/repos/asf/hbase/blob/aaece0ba/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorInMemoryStates.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorInMemoryStates.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorInMemoryStates.java
new file mode 100644
index 0000000..d2bed9b
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorInMemoryStates.java
@@ -0,0 +1,188 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.*;
+import org.apache.hadoop.hbase.client.*;
+import org.apache.hadoop.hbase.master.AssignmentManager;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.PairOfSameType;
+import org.apache.hadoop.hbase.util.Threads;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import org.junit.rules.TestName;
+import org.junit.rules.TestRule;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertNotNull;
+
+@Category({MasterTests.class, MediumTests.class})
+public class TestCatalogJanitorInMemoryStates {
+  private static final Log LOG = LogFactory.getLog(TestCatalogJanitorInMemoryStates.class);
+  @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
+     withTimeout(this.getClass()).withLookingForStuckThread(true).build();
+  @Rule public final TestName name = new TestName();
+  protected final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+  private static byte [] ROW = Bytes.toBytes("testRow");
+  private static byte [] FAMILY = Bytes.toBytes("testFamily");
+  private static byte [] QUALIFIER = Bytes.toBytes("testQualifier");
+  private static byte [] VALUE = Bytes.toBytes("testValue");
+
+  /**
+   * @throws java.lang.Exception
+   */
+  @BeforeClass
+  public static void setUpBeforeClass() throws Exception {
+    Configuration conf = TEST_UTIL.getConfiguration();
+    TEST_UTIL.startMiniCluster(1);
+  }
+
+  /**
+   * @throws java.lang.Exception
+   */
+  @AfterClass
+  public static void tearDownAfterClass() throws Exception {
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  /**
+   * Test clearing a split parent from memory.
+   */
+  @Test(timeout = 180000)
+  public void testInMemoryParentCleanup() throws IOException, InterruptedException {
+    final AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
+    final ServerManager sm = TEST_UTIL.getHBaseCluster().getMaster().getServerManager();
+    final CatalogJanitor janitor = TEST_UTIL.getHBaseCluster().getMaster().catalogJanitorChore;
+
+    Admin admin = TEST_UTIL.getHBaseAdmin();
+    admin.enableCatalogJanitor(false);
+
+    final TableName tableName = TableName.valueOf(name.getMethodName());
+    Table t = TEST_UTIL.createTable(tableName, FAMILY);
+    int rowCount = TEST_UTIL.loadTable(t, FAMILY, false);
+
+    RegionLocator locator = TEST_UTIL.getConnection().getRegionLocator(tableName);
+    List<HRegionLocation> allRegionLocations = locator.getAllRegionLocations();
+
+    // We need to create a valid split with daughter regions
+    HRegionLocation parent = allRegionLocations.get(0);
+    List<HRegionLocation> daughters = splitRegion(parent.getRegionInfo());
+    LOG.info("Parent region: " + parent);
+    LOG.info("Daughter regions: " + daughters);
+    assertNotNull("Should have found daughter regions for " + parent, daughters);
+
+    assertTrue("Parent region should exist in RegionStates",
+        am.getRegionStates().isRegionInRegionStates(parent.getRegionInfo()));
+    assertTrue("Parent region should exist in ServerManager",
+        sm.isRegionInServerManagerStates(parent.getRegionInfo()));
+
+    // clean the parent
+    Result r = MetaMockingUtil.getMetaTableRowResult(parent.getRegionInfo(), null,
+        daughters.get(0).getRegionInfo(), daughters.get(1).getRegionInfo());
+    // We need to wait a little before and after we remove the parent
+    Thread.sleep(5000);
+    janitor.cleanParent(parent.getRegionInfo(), r);
+    Thread.sleep(5000);
+    assertFalse("Parent region should have been removed from RegionStates",
+        am.getRegionStates().isRegionInRegionStates(parent.getRegionInfo()));
+    assertFalse("Parent region should have been removed from ServerManager",
+        sm.isRegionInServerManagerStates(parent.getRegionInfo()));
+
+  }
+
+  /*
+ * Splits a region
+ * @param t Region to split.
+ * @return List of region locations
+ * @throws IOException, InterruptedException
+ */
+  private List<HRegionLocation> splitRegion(final HRegionInfo r)
+      throws IOException, InterruptedException {
+    List<HRegionLocation> locations = new ArrayList<>();
+    // Split this table in two.
+    Admin admin = TEST_UTIL.getHBaseAdmin();
+    Connection connection = TEST_UTIL.getConnection();
+    admin.splitRegion(r.getEncodedNameAsBytes());
+    admin.close();
+    PairOfSameType<HRegionInfo> regions = waitOnDaughters(r);
+    if (regions != null) {
+      try (RegionLocator rl = connection.getRegionLocator(r.getTable())) {
+        locations.add(rl.getRegionLocation(regions.getFirst().getEncodedNameAsBytes()));
+        locations.add(rl.getRegionLocation(regions.getSecond().getEncodedNameAsBytes()));
+      }
+      return locations;
+    }
+    return locations;
+  }
+
+  /*
+   * Wait on region split. May return because we waited long enough on the split
+   * and it didn't happen.  Caller should check.
+   * @param r
+   * @return Daughter regions; caller needs to check table actually split.
+   */
+  private PairOfSameType<HRegionInfo> waitOnDaughters(final HRegionInfo r)
+      throws IOException {
+    long start = System.currentTimeMillis();
+    PairOfSameType<HRegionInfo> pair = null;
+    try (Connection conn = ConnectionFactory.createConnection(TEST_UTIL.getConfiguration());
+         Table metaTable = conn.getTable(TableName.META_TABLE_NAME)) {
+      Result result = null;
+      HRegionInfo region = null;
+      while ((System.currentTimeMillis() - start) < 60000) {
+        result = metaTable.get(new Get(r.getRegionName()));
+        if (result == null) {
+          break;
+        }
+        region = MetaTableAccessor.getHRegionInfo(result);
+        if (region.isSplitParent()) {
+          LOG.debug(region.toString() + " IS a parent!");
+          pair = MetaTableAccessor.getDaughterRegions(result);
+          break;
+        }
+        Threads.sleep(100);
+      }
+
+      if (pair.getFirst() == null || pair.getSecond() == null) {
+        throw new IOException("Failed to get daughters, for parent region: " + r);
+      }
+      return pair;
+    }
+  }
+}


[6/6] hbase git commit: HBASE-18024 HRegion#initializeRegionInternals should not re-create .hregioninfo file when the region directory no longer exists

Posted by ap...@apache.org.
HBASE-18024 HRegion#initializeRegionInternals should not re-create .hregioninfo file when the region directory no longer exists


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/8ca1bf96
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/8ca1bf96
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/8ca1bf96

Branch: refs/heads/branch-1.4
Commit: 8ca1bf96f4b3ca5ad3ed05e6f5ed2a2ad30d8bbb
Parents: e894e87
Author: Esteban Gutierrez <es...@apache.org>
Authored: Fri Jul 21 13:13:00 2017 -0500
Committer: Andrew Purtell <ap...@apache.org>
Committed: Fri Aug 11 13:08:28 2017 -0700

----------------------------------------------------------------------
 .../hadoop/hbase/regionserver/HRegion.java      | 11 +++-
 .../hbase/regionserver/HRegionFileSystem.java   | 31 +++++++++--
 .../hadoop/hbase/regionserver/TestHRegion.java  |  7 ++-
 .../hbase/regionserver/TestRegionOpen.java      | 56 +++++++++++++++++++-
 .../TestStoreFileRefresherChore.java            |  2 +
 5 files changed, 99 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/8ca1bf96/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
index dfb7b71..1fac683 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
@@ -902,8 +902,15 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
     }
 
     // Write HRI to a file in case we need to recover hbase:meta
-    status.setStatus("Writing region info on filesystem");
-    fs.checkRegionInfoOnFilesystem();
+    // Only the primary replica should write .regioninfo
+    if (this.getRegionInfo().getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
+      status.setStatus("Writing region info on filesystem");
+      fs.checkRegionInfoOnFilesystem();
+    } else {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Skipping creation of .regioninfo file for " + this.getRegionInfo());
+      }
+    }
 
     // Initialize all the HStores
     status.setStatus("Initializing all the Stores");

http://git-wip-us.apache.org/repos/asf/hbase/blob/8ca1bf96/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java
index 33c03ca..3a0b30a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java
@@ -858,9 +858,19 @@ public class HRegionFileSystem {
     // only should be sufficient. I don't want to read the file every time to check if it pb
     // serialized.
     byte[] content = getRegionInfoFileContent(regionInfoForFs);
+
+    // Verify if the region directory exists before opening a region. We need to do this since if
+    // the region directory doesn't exist we will re-create the region directory and a new HRI
+    // when HRegion.openHRegion() is called.
     try {
-      Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE);
+      FileStatus status = fs.getFileStatus(getRegionDir());
+    } catch (FileNotFoundException e) {
+      LOG.warn(getRegionDir() + " doesn't exist for region: " + regionInfoForFs.getEncodedName() +
+          " on table " + regionInfo.getTable());
+    }
 
+    try {
+      Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE);
       FileStatus status = fs.getFileStatus(regionInfoFile);
       if (status != null && status.getLen() == content.length) {
         // Then assume the content good and move on.
@@ -953,7 +963,13 @@ public class HRegionFileSystem {
     }
 
     // Write HRI to a file in case we need to recover hbase:meta
-    regionFs.writeRegionInfoOnFilesystem(false);
+    // Only primary replicas should write region info
+    if (regionInfo.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
+      regionFs.writeRegionInfoOnFilesystem(false);
+    } else {
+      if (LOG.isDebugEnabled())
+        LOG.debug("Skipping creation of .regioninfo file for " + regionInfo);
+    }
     return regionFs;
   }
 
@@ -983,8 +999,15 @@ public class HRegionFileSystem {
       regionFs.cleanupSplitsDir();
       regionFs.cleanupMergesDir();
 
-      // if it doesn't exists, Write HRI to a file, in case we need to recover hbase:meta
-      regionFs.checkRegionInfoOnFilesystem();
+      // If it doesn't exists, Write HRI to a file, in case we need to recover hbase:meta
+      // Only create HRI if we are the default replica
+      if (regionInfo.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
+        regionFs.checkRegionInfoOnFilesystem();
+      } else {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Skipping creation of .regioninfo file for " + regionInfo);
+        }
+      }
     }
 
     return regionFs;

http://git-wip-us.apache.org/repos/asf/hbase/blob/8ca1bf96/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
index 03a1926..1ec2fe4 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
@@ -6283,6 +6283,10 @@ public class TestHRegion {
 
   @Test
   public void testCloseRegionWrittenToWAL() throws Exception {
+
+    Path rootDir = new Path(dir + name.getMethodName());
+    FSUtils.setRootDir(TEST_UTIL.getConfiguration(), rootDir);
+
     final ServerName serverName = ServerName.valueOf("testCloseRegionWrittenToWAL", 100, 42);
     final RegionServerServices rss = spy(TEST_UTIL.createMockRegionServerService(serverName));
 
@@ -6301,7 +6305,8 @@ public class TestHRegion {
     when(rss.getWAL((HRegionInfo) any())).thenReturn(wal);
 
 
-    // open a region first so that it can be closed later
+    // create and then open a region first so that it can be closed later
+    region = HRegion.createHRegion(hri, rootDir, TEST_UTIL.getConfiguration(), htd, rss.getWAL(hri));
     region = HRegion.openHRegion(hri, htd, rss.getWAL(hri),
       TEST_UTIL.getConfiguration(), rss, null);
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/8ca1bf96/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionOpen.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionOpen.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionOpen.java
index 7889504..3ecd94a 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionOpen.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionOpen.java
@@ -19,26 +19,39 @@
 package org.apache.hadoop.hbase.regionserver;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 
+
+import java.io.IOException;
+import java.util.List;
 import java.util.concurrent.ThreadPoolExecutor;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HColumnDescriptor;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.client.ConnectionFactory;
 import org.apache.hadoop.hbase.executor.ExecutorType;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.testclassification.RegionServerTests;
 import org.apache.hadoop.hbase.client.Admin;
 import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.FSUtils;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
+import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+import static org.junit.Assert.fail;
 
 @Category({MediumTests.class, RegionServerTests.class})
 public class TestRegionOpen {
@@ -47,7 +60,9 @@ public class TestRegionOpen {
   private static final int NB_SERVERS = 1;
 
   private static final HBaseTestingUtility HTU = new HBaseTestingUtility();
-  final TableName tableName = TableName.valueOf(TestRegionOpen.class.getSimpleName());
+
+  @Rule
+  public TestName name = new TestName();
 
   @BeforeClass
   public static void before() throws Exception {
@@ -68,6 +83,7 @@ public class TestRegionOpen {
 
   @Test(timeout = 60000)
   public void testPriorityRegionIsOpenedWithSeparateThreadPool() throws Exception {
+    final TableName tableName = TableName.valueOf(TestRegionOpen.class.getSimpleName());
     ThreadPoolExecutor exec = getRS().getExecutorService()
         .getExecutorThreadPool(ExecutorType.RS_OPEN_PRIORITY_REGION);
     assertEquals(1, exec.getCompletedTaskCount()); // namespace region
@@ -82,4 +98,42 @@ public class TestRegionOpen {
 
     assertEquals(2, exec.getCompletedTaskCount());
   }
+
+  @Test(timeout = 60000)
+  public void testNonExistentRegionReplica() throws Exception {
+    final TableName tableName = TableName.valueOf(name.getMethodName());
+    final byte[] FAMILYNAME = Bytes.toBytes("fam");
+    FileSystem fs = HTU.getTestFileSystem();
+    Connection connection = HTU.getConnection();
+    Admin admin = connection.getAdmin();
+    Configuration conf = HTU.getConfiguration();
+    Path rootDir = HTU.getDataTestDirOnTestFS();
+
+    HTableDescriptor htd = new HTableDescriptor(tableName);
+    htd.addFamily(new HColumnDescriptor(FAMILYNAME));
+    admin.createTable(htd);
+    HTU.waitUntilNoRegionsInTransition(60000);
+
+    // Create new HRI with non-default region replica id
+    HRegionInfo hri = new HRegionInfo(htd.getTableName(),  Bytes.toBytes("A"), Bytes.toBytes("B"), false,
+        System.currentTimeMillis(), 2);
+    HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs,
+        FSUtils.getTableDir(rootDir, hri.getTable()), hri);
+    Path regionDir = regionFs.getRegionDir();
+    try {
+      HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
+    } catch (IOException e) {
+      LOG.info("Caught expected IOE due missing .regioninfo file, due: " + e.getMessage() + " skipping region open.");
+      // We should only have 1 region online
+      List<HRegionInfo> regions = admin.getTableRegions(tableName);
+      LOG.info("Regions: " + regions);
+      if (regions.size() != 1) {
+        fail("Table " + tableName + " should have only one region, but got more: " + regions);
+      }
+      return;
+    } finally {
+      admin.close();
+    }
+    fail("Should have thrown IOE when attempting to open a non-existing region.");
+  }
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/8ca1bf96/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileRefresherChore.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileRefresherChore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileRefresherChore.java
index ab9236d..455be4a 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileRefresherChore.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileRefresherChore.java
@@ -169,6 +169,8 @@ public class TestStoreFileRefresherChore {
     when(regionServer.getConfiguration()).thenReturn(TEST_UTIL.getConfiguration());
 
     HTableDescriptor htd = getTableDesc(TableName.valueOf("testIsStale"), families);
+    htd.setRegionReplication(2);
+
     Region primary = initHRegion(htd, HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, 0);
     Region replica1 = initHRegion(htd, HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, 1);
     regions.add(primary);


[2/6] hbase git commit: HBASE-18479 should apply HBASE-18255 to HBASE_MASTER_OPTS too

Posted by ap...@apache.org.
HBASE-18479 should apply HBASE-18255 to HBASE_MASTER_OPTS too

Signed-off-by: tedyu <yu...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/1ce51032
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/1ce51032
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/1ce51032

Branch: refs/heads/branch-1.4
Commit: 1ce510322b712f3f5b6c2239a85f6df5dffcac91
Parents: 8ca1bf9
Author: chenyechao <ch...@cmss.chinamobile.com>
Authored: Sun Jul 30 14:07:38 2017 +0800
Committer: Andrew Purtell <ap...@apache.org>
Committed: Fri Aug 11 13:08:28 2017 -0700

----------------------------------------------------------------------
 conf/hbase-env.cmd | 2 +-
 conf/hbase-env.sh  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/1ce51032/conf/hbase-env.cmd
----------------------------------------------------------------------
diff --git a/conf/hbase-env.cmd b/conf/hbase-env.cmd
index 2ffadbf..b10e934 100644
--- a/conf/hbase-env.cmd
+++ b/conf/hbase-env.cmd
@@ -43,7 +43,7 @@
 set HBASE_OPTS="-XX:+UseConcMarkSweepGC" "-Djava.net.preferIPv4Stack=true"
 
 @rem Configure PermSize. Only needed in JDK7. You can safely remove it for JDK8+
-set HBASE_MASTER_OPTS=%HBASE_MASTER_OPTS% "-XX:PermSize=128m" "-XX:MaxPermSize=128m"
+set HBASE_MASTER_OPTS=%HBASE_MASTER_OPTS% "-XX:PermSize=128m" "-XX:MaxPermSize=128m" "-XX:ReservedCodeCacheSize=256m"
 set HBASE_REGIONSERVER_OPTS=%HBASE_REGIONSERVER_OPTS% "-XX:PermSize=128m" "-XX:MaxPermSize=128m" "-XX:ReservedCodeCacheSize=256m"
 
 @rem Uncomment below to enable java garbage collection logging for the server-side processes

http://git-wip-us.apache.org/repos/asf/hbase/blob/1ce51032/conf/hbase-env.sh
----------------------------------------------------------------------
diff --git a/conf/hbase-env.sh b/conf/hbase-env.sh
index 599a2f1..f2195da 100644
--- a/conf/hbase-env.sh
+++ b/conf/hbase-env.sh
@@ -43,7 +43,7 @@
 export HBASE_OPTS="-XX:+UseConcMarkSweepGC"
 
 # Configure PermSize. Only needed in JDK7. You can safely remove it for JDK8+
-export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS -XX:PermSize=128m -XX:MaxPermSize=128m"
+export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS -XX:PermSize=128m -XX:MaxPermSize=128m -XX:ReservedCodeCacheSize=256m"
 export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS -XX:PermSize=128m -XX:MaxPermSize=128m -XX:ReservedCodeCacheSize=256m"
 
 # Uncomment one of the below three options to enable java garbage collection logging for the server-side processes.


[5/6] hbase git commit: HBASE-18197 Avoided to call job.waitForCompletion(true) two times

Posted by ap...@apache.org.
HBASE-18197 Avoided to call job.waitForCompletion(true) two times

Signed-off-by: Chia-Ping Tsai <ch...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/3b9c58be
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/3b9c58be
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/3b9c58be

Branch: refs/heads/branch-1.4
Commit: 3b9c58be46e6eec4993650b27e667e5cda11742a
Parents: 1ce5103
Author: Jan Hentschel <ja...@ultratendency.com>
Authored: Sat Jun 10 22:17:00 2017 +0200
Committer: Andrew Purtell <ap...@apache.org>
Committed: Fri Aug 11 13:08:28 2017 -0700

----------------------------------------------------------------------
 .../src/main/java/org/apache/hadoop/hbase/mapreduce/Import.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/3b9c58be/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Import.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Import.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Import.java
index d315b81..8a80d15 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Import.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Import.java
@@ -750,6 +750,6 @@ public class Import {
       }
     }
 
-    System.exit(job.waitForCompletion(true) ? 0 : 1);
+    System.exit(isJobSuccessful ? 0 : 1);
   }
 }


[3/6] hbase git commit: HBASE-18248 Warn if monitored RPC task has been tied up beyond a configurable threshold

Posted by ap...@apache.org.
HBASE-18248 Warn if monitored RPC task has been tied up beyond a configurable threshold


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/a048e6ed
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/a048e6ed
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/a048e6ed

Branch: refs/heads/branch-1.4
Commit: a048e6ed53b19573499a9ef81a531ed2e8f6b0dc
Parents: 8d826b8
Author: Andrew Purtell <ap...@apache.org>
Authored: Wed Aug 9 18:11:28 2017 -0700
Committer: Andrew Purtell <ap...@apache.org>
Committed: Fri Aug 11 13:08:28 2017 -0700

----------------------------------------------------------------------
 .../monitoring/MonitoredRPCHandlerImpl.java     |  8 +-
 .../hadoop/hbase/monitoring/MonitoredTask.java  |  2 +
 .../hbase/monitoring/MonitoredTaskImpl.java     | 16 +++-
 .../hadoop/hbase/monitoring/TaskMonitor.java    | 88 +++++++++++++++++---
 .../hbase/monitoring/TestTaskMonitor.java       | 44 +++++++---
 5 files changed, 130 insertions(+), 28 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/a048e6ed/hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/MonitoredRPCHandlerImpl.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/MonitoredRPCHandlerImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/MonitoredRPCHandlerImpl.java
index a29595b..08c8c9f 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/MonitoredRPCHandlerImpl.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/MonitoredRPCHandlerImpl.java
@@ -251,6 +251,12 @@ public class MonitoredRPCHandlerImpl extends MonitoredTaskImpl
     if (getState() != State.RUNNING) {
       return super.toString();
     }
-    return super.toString() + ", rpcMethod=" + getRPC();
+    return super.toString()
+        + ", queuetimems=" + getRPCQueueTime()
+        + ", starttimems=" + getRPCStartTime()
+        + ", clientaddress=" + clientAddress
+        + ", remoteport=" + remotePort
+        + ", packetlength=" + getRPCPacketLength()
+        + ", rpcMethod=" + getRPC();
   }
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/a048e6ed/hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/MonitoredTask.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/MonitoredTask.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/MonitoredTask.java
index ff3667b..48fba1b 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/MonitoredTask.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/MonitoredTask.java
@@ -39,6 +39,7 @@ public interface MonitoredTask extends Cloneable {
   State getState();
   long getStateTime();
   long getCompletionTimestamp();
+  long getWarnTime();
 
   void markComplete(String msg);
   void pause(String msg);
@@ -48,6 +49,7 @@ public interface MonitoredTask extends Cloneable {
 
   void setStatus(String status);
   void setDescription(String description);
+  void setWarnTime(final long t);
 
   /**
    * Explicitly mark this status as able to be cleaned up,

http://git-wip-us.apache.org/repos/asf/hbase/blob/a048e6ed/hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/MonitoredTaskImpl.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/MonitoredTaskImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/MonitoredTaskImpl.java
index 27aaceb..0cee4c8 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/MonitoredTaskImpl.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/MonitoredTaskImpl.java
@@ -30,7 +30,8 @@ class MonitoredTaskImpl implements MonitoredTask {
   private long startTime;
   private long statusTime;
   private long stateTime;
-  
+  private long warnTime;
+
   private volatile String status;
   private volatile String description;
   
@@ -42,6 +43,7 @@ class MonitoredTaskImpl implements MonitoredTask {
     startTime = System.currentTimeMillis();
     statusTime = startTime;
     stateTime = startTime;
+    warnTime = startTime;
   }
 
   @Override
@@ -82,7 +84,12 @@ class MonitoredTaskImpl implements MonitoredTask {
   public long getStateTime() {
     return stateTime;
   }
-  
+
+  @Override
+  public long getWarnTime() {
+    return warnTime;
+  }
+
   @Override
   public long getCompletionTimestamp() {
     if (state == State.COMPLETE || state == State.ABORTED) {
@@ -132,6 +139,11 @@ class MonitoredTaskImpl implements MonitoredTask {
   }
 
   @Override
+  public void setWarnTime(long t) {
+    this.warnTime = t;
+  }
+
+  @Override
   public void cleanup() {
     if (state == State.RUNNING) {
       setState(State.ABORTED);

http://git-wip-us.apache.org/repos/asf/hbase/blob/a048e6ed/hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/TaskMonitor.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/TaskMonitor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/TaskMonitor.java
index 53db6a9..0f91234 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/TaskMonitor.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/monitoring/TaskMonitor.java
@@ -30,9 +30,12 @@ import java.util.List;
 import org.apache.commons.collections.buffer.CircularFifoBuffer;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.hadoop.hbase.util.Threads;
 
-import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.Lists;
 
 /**
@@ -44,16 +47,35 @@ import com.google.common.collect.Lists;
 public class TaskMonitor {
   private static final Log LOG = LogFactory.getLog(TaskMonitor.class);
 
-  // Don't keep around any tasks that have completed more than
-  // 60 seconds ago
-  private static final long EXPIRATION_TIME = 60*1000;
+  public static final String MAX_TASKS_KEY = "hbase.taskmonitor.max.tasks";
+  public static final int DEFAULT_MAX_TASKS = 1000;
+  public static final String RPC_WARN_TIME_KEY = "hbase.taskmonitor.rpc.warn.time";
+  public static final long DEFAULT_RPC_WARN_TIME = 0;
+  public static final String EXPIRATION_TIME_KEY = "hbase.taskmonitor.expiration.time";
+  public static final long DEFAULT_EXPIRATION_TIME = 60*1000;
+  public static final String MONITOR_INTERVAL_KEY = "hbase.taskmonitor.monitor.interval";
+  public static final long DEFAULT_MONITOR_INTERVAL = 10*1000;
 
-  @VisibleForTesting
-  static final int MAX_TASKS = 1000;
-  
   private static TaskMonitor instance;
-  private CircularFifoBuffer tasks = new CircularFifoBuffer(MAX_TASKS);
-  private List<TaskAndWeakRefPair> rpcTasks = Lists.newArrayList();
+
+  private final int maxTasks;
+  private final long rpcWarnTime;
+  private final long expirationTime;
+  private final CircularFifoBuffer tasks;
+  private final List<TaskAndWeakRefPair> rpcTasks;
+  private final long monitorInterval;
+  private Thread monitorThread;
+
+  TaskMonitor(Configuration conf) {
+    maxTasks = conf.getInt(MAX_TASKS_KEY, DEFAULT_MAX_TASKS);
+    expirationTime = conf.getLong(EXPIRATION_TIME_KEY, DEFAULT_EXPIRATION_TIME);
+    rpcWarnTime = conf.getLong(RPC_WARN_TIME_KEY, DEFAULT_RPC_WARN_TIME);
+    tasks = new CircularFifoBuffer(maxTasks);
+    rpcTasks = Lists.newArrayList();
+    monitorInterval = conf.getLong(MONITOR_INTERVAL_KEY, DEFAULT_MONITOR_INTERVAL);
+    monitorThread = new Thread(new MonitorRunnable());
+    Threads.setDaemonThreadRunning(monitorThread, "Monitor thread for TaskMonitor");
+  }
 
   /**
    * Get singleton instance.
@@ -61,7 +83,7 @@ public class TaskMonitor {
    */
   public static synchronized TaskMonitor get() {
     if (instance == null) {
-      instance = new TaskMonitor();
+      instance = new TaskMonitor(HBaseConfiguration.create());
     }
     return instance;
   }
@@ -93,6 +115,22 @@ public class TaskMonitor {
     return proxy;
   }
 
+  private synchronized void warnStuckTasks() {
+    if (rpcWarnTime > 0) {
+      final long now = EnvironmentEdgeManager.currentTime();
+      for (Iterator<TaskAndWeakRefPair> it = rpcTasks.iterator();
+          it.hasNext();) {
+        TaskAndWeakRefPair pair = it.next();
+        MonitoredTask stat = pair.get();
+        if ((stat.getState() == MonitoredTaskImpl.State.RUNNING) &&
+            (now >= stat.getWarnTime() + rpcWarnTime)) {
+          LOG.warn("Task may be stuck: " + stat);
+          stat.setWarnTime(now);
+        }
+      }
+    }
+  }
+
   private synchronized void purgeExpiredTasks() {
     for (Iterator<TaskAndWeakRefPair> it = tasks.iterator();
          it.hasNext();) {
@@ -140,12 +178,11 @@ public class TaskMonitor {
 
   private boolean canPurge(MonitoredTask stat) {
     long cts = stat.getCompletionTimestamp();
-    return (cts > 0 && System.currentTimeMillis() - cts > EXPIRATION_TIME);
+    return (cts > 0 && EnvironmentEdgeManager.currentTime() - cts > expirationTime);
   }
-  
 
   public void dumpAsText(PrintWriter out) {
-    long now = System.currentTimeMillis();
+    long now = EnvironmentEdgeManager.currentTime();
     
     List<MonitoredTask> tasks = getTasks();
     for (MonitoredTask task : tasks) {
@@ -165,6 +202,12 @@ public class TaskMonitor {
     }
   }
 
+  public synchronized void shutdown() {
+    if (this.monitorThread != null) {
+      monitorThread.interrupt();
+    }
+  }
+
   /**
    * This class encapsulates an object as well as a weak reference to a proxy
    * that passes through calls to that object. In art form:
@@ -219,4 +262,23 @@ public class TaskMonitor {
       return method.invoke(delegatee, args);
     }    
   }
+
+  private class MonitorRunnable implements Runnable {
+    private boolean running = true;
+
+    @Override
+    public void run() {
+      while (running) {
+        try {
+          Thread.sleep(monitorInterval);
+          if (tasks.isFull()) {
+            purgeExpiredTasks();
+          }
+          warnStuckTasks();
+        } catch (InterruptedException e) {
+          running = false;
+        }
+      }
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/a048e6ed/hbase-server/src/test/java/org/apache/hadoop/hbase/monitoring/TestTaskMonitor.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/monitoring/TestTaskMonitor.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/monitoring/TestTaskMonitor.java
index d09b1d1..0914b85 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/monitoring/TestTaskMonitor.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/monitoring/TestTaskMonitor.java
@@ -22,7 +22,9 @@ import static org.junit.Assert.*;
 
 import java.util.concurrent.atomic.AtomicBoolean;
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 
@@ -31,7 +33,7 @@ public class TestTaskMonitor {
 
   @Test
   public void testTaskMonitorBasics() {
-    TaskMonitor tm = new TaskMonitor();
+    TaskMonitor tm = new TaskMonitor(new Configuration());
     assertTrue("Task monitor should start empty",
         tm.getTasks().isEmpty());
     
@@ -54,11 +56,13 @@ public class TestTaskMonitor {
     // If we mark its completion time back a few minutes, it should get gced
     task.expireNow();
     assertEquals(0, tm.getTasks().size());
+
+    tm.shutdown();
   }
   
   @Test
   public void testTasksGetAbortedOnLeak() throws InterruptedException {
-    final TaskMonitor tm = new TaskMonitor();
+    final TaskMonitor tm = new TaskMonitor(new Configuration());
     assertTrue("Task monitor should start empty",
         tm.getTasks().isEmpty());
     
@@ -85,42 +89,58 @@ public class TestTaskMonitor {
     // Now it should be aborted 
     MonitoredTask taskFromTm = tm.getTasks().get(0);
     assertEquals(MonitoredTask.State.ABORTED, taskFromTm.getState());
+
+    tm.shutdown();
   }
   
   @Test
   public void testTaskLimit() throws Exception {
-    TaskMonitor tm = new TaskMonitor();
-    for (int i = 0; i < TaskMonitor.MAX_TASKS + 10; i++) {
+    TaskMonitor tm = new TaskMonitor(new Configuration());
+    for (int i = 0; i < TaskMonitor.DEFAULT_MAX_TASKS + 10; i++) {
       tm.createStatus("task " + i);
     }
     // Make sure it was limited correctly
-    assertEquals(TaskMonitor.MAX_TASKS, tm.getTasks().size());
+    assertEquals(TaskMonitor.DEFAULT_MAX_TASKS, tm.getTasks().size());
     // Make sure we culled the earlier tasks, not later
     // (i.e. tasks 0 through 9 should have been deleted)
     assertEquals("task 10", tm.getTasks().get(0).getDescription());
+    tm.shutdown();
   }
 
   @Test
   public void testDoNotPurgeRPCTask() throws Exception {
     int RPCTaskNums = 10;
+    TaskMonitor tm = TaskMonitor.get();
     for(int i = 0; i < RPCTaskNums; i++) {
-      TaskMonitor.get().createRPCStatus("PRCTask" + i);
+      tm.createRPCStatus("PRCTask" + i);
     }
-    for(int i = 0; i < TaskMonitor.MAX_TASKS; i++) {
-      TaskMonitor.get().createStatus("otherTask" + i);
+    for(int i = 0; i < TaskMonitor.DEFAULT_MAX_TASKS; i++) {
+      tm.createStatus("otherTask" + i);
     }
     int remainRPCTask = 0;
-    for(MonitoredTask task :TaskMonitor.get().getTasks()) {
+    for(MonitoredTask task: tm.getTasks()) {
       if(task instanceof MonitoredRPCHandler) {
         remainRPCTask++;
       }
     }
     assertEquals("RPC Tasks have been purged!", RPCTaskNums, remainRPCTask);
-
+    tm.shutdown();
   }
 
-
-
+  @Test
+  public void testWarnStuckTasks() throws Exception {
+    final int INTERVAL = 1000;
+    Configuration conf = new Configuration();
+    conf.setLong(TaskMonitor.RPC_WARN_TIME_KEY, INTERVAL);
+    conf.setLong(TaskMonitor.MONITOR_INTERVAL_KEY, INTERVAL);
+    final TaskMonitor tm = new TaskMonitor(conf);
+    MonitoredRPCHandler t = tm.createRPCStatus("test task");
+    long then = EnvironmentEdgeManager.currentTime();
+    t.setRPC("testMethod", new Object[0], then);
+    Thread.sleep(INTERVAL * 2);
+    assertTrue("We did not warn", t.getWarnTime() > then);
+    tm.shutdown();
+  }
 
 }
 


[4/6] hbase git commit: HBASE-18398: Snapshot operation fails with FileNotFoundException

Posted by ap...@apache.org.
HBASE-18398: Snapshot operation fails with FileNotFoundException


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/e894e875
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/e894e875
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/e894e875

Branch: refs/heads/branch-1.4
Commit: e894e8753ce770a73140a3549d0e90d8701f19b1
Parents: a048e6e
Author: Ashu Pachauri <as...@apache.org>
Authored: Mon Aug 7 18:10:33 2017 -0700
Committer: Andrew Purtell <ap...@apache.org>
Committed: Fri Aug 11 13:08:28 2017 -0700

----------------------------------------------------------------------
 .../hadoop/hbase/regionserver/HRegion.java      |  29 ++-
 .../hadoop/hbase/regionserver/HStore.java       |  16 ++
 .../hadoop/hbase/regionserver/Region.java       |   9 +-
 .../snapshot/FlushSnapshotSubprocedure.java     |  31 ++-
 .../hadoop/hbase/snapshot/SnapshotManifest.java |  22 +-
 .../hbase/snapshot/TestRegionSnapshotTask.java  | 205 +++++++++++++++++++
 6 files changed, 289 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/e894e875/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
index d6ad5a4..dfb7b71 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
@@ -8497,11 +8497,12 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
     case DELETE:
     case BATCH_MUTATE:
     case COMPACT_REGION:
-      // when a region is in recovering state, no read, split or merge is allowed
+    case SNAPSHOT:
+      // when a region is in recovering state, no read, split, merge or snapshot is allowed
       if (isRecovering() && (this.disallowWritesInRecovering ||
-              (op != Operation.PUT && op != Operation.DELETE && op != Operation.BATCH_MUTATE))) {
+          (op != Operation.PUT && op != Operation.DELETE && op != Operation.BATCH_MUTATE))) {
         throw new RegionInRecoveryException(getRegionInfo().getRegionNameAsString() +
-          " is recovering; cannot take reads");
+            " is recovering; cannot take reads");
       }
       break;
     default:
@@ -8521,6 +8522,15 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
       lock.readLock().unlock();
       throw new NotServingRegionException(getRegionInfo().getRegionNameAsString() + " is closed");
     }
+    // The unit for snapshot is a region. So, all stores for this region must be
+    // prepared for snapshot operation before proceeding.
+    if (op == Operation.SNAPSHOT) {
+      for (Store store : stores.values()) {
+        if (store instanceof HStore) {
+          ((HStore)store).preSnapshotOperation();
+        }
+      }
+    }
     try {
       if (coprocessorHost != null) {
         coprocessorHost.postStartRegionOperation(op);
@@ -8536,12 +8546,15 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
     closeRegionOperation(Operation.ANY);
   }
 
-  /**
-   * Closes the lock. This needs to be called in the finally block corresponding
-   * to the try block of {@link #startRegionOperation(Operation)}
-   * @throws IOException
-   */
+  @Override
   public void closeRegionOperation(Operation operation) throws IOException {
+    if (operation == Operation.SNAPSHOT) {
+      for (Store store: stores.values()) {
+        if (store instanceof HStore) {
+          ((HStore)store).postSnapshotOperation();
+        }
+      }
+    }
     lock.readLock().unlock();
     if (coprocessorHost != null) {
       coprocessorHost.postCloseRegionOperation(operation);

http://git-wip-us.apache.org/repos/asf/hbase/blob/e894e875/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
index c211736..de95aeb 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
@@ -2682,6 +2682,22 @@ public class HStore implements Store {
 	   return getRegionInfo().getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID;
   }
 
+  /**
+   * Sets the store up for a region level snapshot operation.
+   * @see #postSnapshotOperation()
+   */
+  public void preSnapshotOperation() {
+    archiveLock.lock();
+  }
+
+  /**
+   * Perform tasks needed after the completion of snapshot operation.
+   * @see #preSnapshotOperation()
+   */
+  public void postSnapshotOperation() {
+    archiveLock.unlock();
+  }
+
   @Override
   public synchronized void closeAndArchiveCompactedFiles() throws IOException {
     // ensure other threads do not attempt to archive the same files on close()

http://git-wip-us.apache.org/repos/asf/hbase/blob/e894e875/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Region.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Region.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Region.java
index d4d971e..6642220 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Region.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Region.java
@@ -221,7 +221,7 @@ public interface Region extends ConfigurationObserver {
    */
   enum Operation {
     ANY, GET, PUT, DELETE, SCAN, APPEND, INCREMENT, SPLIT_REGION, MERGE_REGION, BATCH_MUTATE,
-    REPLAY_BATCH_MUTATE, COMPACT_REGION, REPLAY_EVENT
+    REPLAY_BATCH_MUTATE, COMPACT_REGION, REPLAY_EVENT, SNAPSHOT
   }
 
   /**
@@ -251,6 +251,13 @@ public interface Region extends ConfigurationObserver {
    */
   void closeRegionOperation() throws IOException;
 
+  /**
+   * Closes the region operation lock. This needs to be called in the finally block corresponding
+   * to the try block of {@link #startRegionOperation(Operation)}
+   * @throws IOException
+   */
+  void closeRegionOperation(Operation op) throws IOException;
+
   // Row write locks
 
   /**

http://git-wip-us.apache.org/repos/asf/hbase/blob/e894e875/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/FlushSnapshotSubprocedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/FlushSnapshotSubprocedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/FlushSnapshotSubprocedure.java
index af2a496..b1179b5 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/FlushSnapshotSubprocedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/FlushSnapshotSubprocedure.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotDescrip
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.Region;
 import org.apache.hadoop.hbase.regionserver.Region.FlushResult;
+import org.apache.hadoop.hbase.regionserver.Region.Operation;
 import org.apache.hadoop.hbase.regionserver.snapshot.RegionServerSnapshotManager.SnapshotSubprocedurePool;
 import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
 
@@ -74,10 +75,18 @@ public class FlushSnapshotSubprocedure extends Subprocedure {
   /**
    * Callable for adding files to snapshot manifest working dir.  Ready for multithreading.
    */
-  private class RegionSnapshotTask implements Callable<Void> {
-    Region region;
-    RegionSnapshotTask(Region region) {
+  public static class RegionSnapshotTask implements Callable<Void> {
+    private Region region;
+    private boolean skipFlush;
+    private ForeignExceptionDispatcher monitor;
+    private SnapshotDescription snapshotDesc;
+
+    public RegionSnapshotTask(Region region, SnapshotDescription snapshotDesc,
+        boolean skipFlush, ForeignExceptionDispatcher monitor) {
       this.region = region;
+      this.skipFlush = skipFlush;
+      this.monitor = monitor;
+      this.snapshotDesc = snapshotDesc;
     }
 
     @Override
@@ -87,10 +96,10 @@ public class FlushSnapshotSubprocedure extends Subprocedure {
       // snapshots that involve multiple regions and regionservers.  It is still possible to have
       // an interleaving such that globally regions are missing, so we still need the verification
       // step.
-      LOG.debug("Starting region operation on " + region);
-      region.startRegionOperation();
+      LOG.debug("Starting snapshot operation on " + region);
+      region.startRegionOperation(Operation.SNAPSHOT);
       try {
-        if (snapshotSkipFlush) {
+        if (skipFlush) {
         /*
          * This is to take an online-snapshot without force a coordinated flush to prevent pause
          * The snapshot type is defined inside the snapshot description. FlushSnapshotSubprocedure
@@ -123,15 +132,15 @@ public class FlushSnapshotSubprocedure extends Subprocedure {
             throw new IOException("Unable to complete flush after " + MAX_RETRIES + " attempts");
           }
         }
-        ((HRegion)region).addRegionToSnapshot(snapshot, monitor);
-        if (snapshotSkipFlush) {
+        ((HRegion)region).addRegionToSnapshot(snapshotDesc, monitor);
+        if (skipFlush) {
           LOG.debug("... SkipFlush Snapshotting region " + region.toString() + " completed.");
         } else {
           LOG.debug("... Flush Snapshotting region " + region.toString() + " completed.");
         }
       } finally {
-        LOG.debug("Closing region operation on " + region);
-        region.closeRegionOperation();
+        LOG.debug("Closing snapshot operation on " + region);
+        region.closeRegionOperation(Operation.SNAPSHOT);
       }
       return null;
     }
@@ -155,7 +164,7 @@ public class FlushSnapshotSubprocedure extends Subprocedure {
     // Add all hfiles already existing in region.
     for (Region region : regions) {
       // submit one task per region for parallelize by region.
-      taskManager.submitTask(new RegionSnapshotTask(region));
+      taskManager.submitTask(new RegionSnapshotTask(region, snapshot, snapshotSkipFlush, monitor));
       monitor.rethrowException();
     }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/e894e875/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotManifest.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotManifest.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotManifest.java
index e4cac95..265ff5b 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotManifest.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotManifest.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.hbase.snapshot;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.google.protobuf.CodedInputStream;
 import com.google.protobuf.InvalidProtocolBufferException;
 
@@ -110,6 +111,7 @@ public final class SnapshotManifest {
       final Path workingDir, final SnapshotDescription desc,
       final ForeignExceptionSnare monitor) {
     return new SnapshotManifest(conf, fs, workingDir, desc, monitor);
+
   }
 
   /**
@@ -163,9 +165,15 @@ public final class SnapshotManifest {
    * This is used by the "online snapshot" when the table is enabled.
    */
   public void addRegion(final HRegion region) throws IOException {
-    // 0. Get the ManifestBuilder/RegionVisitor
+    // Get the ManifestBuilder/RegionVisitor
     RegionVisitor visitor = createRegionVisitor(desc);
 
+    // Visit the region and add it to the manifest
+    addRegion(region, visitor);
+  }
+
+  @VisibleForTesting
+  protected void addRegion(final HRegion region, RegionVisitor visitor) throws IOException {
     // 1. dump region meta info into the snapshot directory
     LOG.debug("Storing '" + region + "' region-info for snapshot.");
     Object regionData = visitor.regionOpen(region.getRegionInfo());
@@ -203,12 +211,20 @@ public final class SnapshotManifest {
    * This is used by the "offline snapshot" when the table is disabled.
    */
   public void addRegion(final Path tableDir, final HRegionInfo regionInfo) throws IOException {
-    // 0. Get the ManifestBuilder/RegionVisitor
+    // Get the ManifestBuilder/RegionVisitor
     RegionVisitor visitor = createRegionVisitor(desc);
 
+    // Visit the region and add it to the manifest
+    addRegion(tableDir, regionInfo, visitor);
+  }
+
+  @VisibleForTesting
+  protected void addRegion(final Path tableDir, final HRegionInfo regionInfo, RegionVisitor visitor)
+      throws IOException {
+
     // Open the RegionFS
     HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem(conf, fs,
-          tableDir, regionInfo, true);
+        tableDir, regionInfo, true);
     monitor.rethrowException();
 
     // 1. dump region meta info into the snapshot directory

http://git-wip-us.apache.org/repos/asf/hbase/blob/e894e875/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestRegionSnapshotTask.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestRegionSnapshotTask.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestRegionSnapshotTask.java
new file mode 100644
index 0000000..403b1e6
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestRegionSnapshotTask.java
@@ -0,0 +1,205 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.snapshot;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher;
+import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
+import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos;
+import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
+import org.apache.hadoop.hbase.regionserver.snapshot.FlushSnapshotSubprocedure;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.testclassification.RegionServerTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.FSUtils;
+
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.spy;
+
+/**
+ * Testing the region snapshot task on a cluster.
+ * @see org.apache.hadoop.hbase.regionserver.snapshot.FlushSnapshotSubprocedure.RegionSnapshotTask
+ */
+@Category({ MediumTests.class, RegionServerTests.class})
+public class TestRegionSnapshotTask {
+  private final Log LOG = LogFactory.getLog(getClass());
+
+  private static HBaseTestingUtility TEST_UTIL;
+  private static Configuration conf;
+  private static FileSystem fs;
+  private static Path rootDir;
+
+  @BeforeClass
+  public static void setupBeforeClass() throws Exception {
+    TEST_UTIL = new HBaseTestingUtility();
+
+    conf = TEST_UTIL.getConfiguration();
+
+    // Try to frequently clean up compacted files
+    conf.setInt("hbase.hfile.compaction.discharger.interval", 1000);
+    conf.setInt("hbase.master.hfilecleaner.ttl", 1000);
+
+    TEST_UTIL.startMiniCluster(1);
+    TEST_UTIL.getHBaseCluster().waitForActiveAndReadyMaster();
+    TEST_UTIL.waitUntilAllRegionsAssigned(TableName.META_TABLE_NAME);
+
+    rootDir = FSUtils.getRootDir(conf);
+    fs = TEST_UTIL.getTestFileSystem();
+  }
+
+  @AfterClass
+  public static void tearDown() throws Exception {
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  /**
+   * Tests adding a region to the snapshot manifest while compactions are running on the region.
+   * The idea is to slow down the process of adding a store file to the manifest while
+   * triggering compactions on the region, allowing the store files to be marked for archival while
+   * snapshot operation is running.
+   * This test checks for the correct behavior in such a case that the compacted files should
+   * not be moved around if a snapshot operation is in progress.
+   * See HBASE-18398
+   */
+  @Test(timeout = 30000)
+  public void testAddRegionWithCompactions() throws Exception {
+    final TableName tableName = TableName.valueOf("test_table");
+    Table table = setupTable(tableName);
+
+    List<HRegion> hRegions = TEST_UTIL.getHBaseCluster().getRegions(tableName);
+
+    final HBaseProtos.SnapshotDescription snapshot = HBaseProtos.SnapshotDescription.newBuilder()
+        .setTable(tableName.getNameAsString())
+        .setType(HBaseProtos.SnapshotDescription.Type.FLUSH)
+        .setName("test_table_snapshot")
+        .setVersion(SnapshotManifestV2.DESCRIPTOR_VERSION)
+        .build();
+    ForeignExceptionDispatcher monitor = new ForeignExceptionDispatcher(snapshot.getName());
+
+    final HRegion region = spy(hRegions.get(0));
+
+    Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir);
+    final SnapshotManifest manifest =
+        SnapshotManifest.create(conf, fs, workingDir, snapshot, monitor);
+    manifest.addTableDescriptor(table.getTableDescriptor());
+
+    if (!fs.exists(workingDir)) {
+      fs.mkdirs(workingDir);
+    }
+    assertTrue(fs.exists(workingDir));
+    SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, fs);
+
+    doAnswer(new Answer<Void>() {
+      @Override
+      public Void answer(InvocationOnMock invocationOnMock) throws Throwable {
+        addRegionToSnapshot(snapshot, region, manifest);
+        return null;
+      }
+    }).when(region).addRegionToSnapshot(snapshot, monitor);
+
+    FlushSnapshotSubprocedure.RegionSnapshotTask snapshotTask =
+        new FlushSnapshotSubprocedure.RegionSnapshotTask(region, snapshot, true, monitor);
+    ExecutorService executor = Executors.newFixedThreadPool(1);
+    Future f = executor.submit(snapshotTask);
+
+    // Trigger major compaction and wait for snaphot operation to finish
+    LOG.info("Starting major compaction");
+    region.compact(true);
+    LOG.info("Finished major compaction");
+    f.get();
+
+    // Consolidate region manifests into a single snapshot manifest
+    manifest.consolidate();
+
+    // Make sure that the region manifest exists, which means the snapshot operation succeeded
+    assertNotNull(manifest.getRegionManifests());
+    // Sanity check, there should be only one region
+    assertEquals(1, manifest.getRegionManifests().size());
+
+    // Make sure that no files went missing after the snapshot operation
+    SnapshotReferenceUtil.verifySnapshot(conf, fs, manifest);
+  }
+
+  private void addRegionToSnapshot(HBaseProtos.SnapshotDescription snapshot,
+      HRegion region, SnapshotManifest manifest) throws Exception {
+    LOG.info("Adding region to snapshot: " + region.getRegionInfo().getRegionNameAsString());
+    Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir);
+    SnapshotManifest.RegionVisitor visitor = createRegionVisitorWithDelay(snapshot, workingDir);
+    manifest.addRegion(region, visitor);
+    LOG.info("Added the region to snapshot: " + region.getRegionInfo().getRegionNameAsString());
+  }
+
+  private SnapshotManifest.RegionVisitor createRegionVisitorWithDelay(
+      HBaseProtos.SnapshotDescription desc, Path workingDir) {
+    return new SnapshotManifestV2.ManifestBuilder(conf, fs, workingDir) {
+      @Override
+      public void storeFile(final SnapshotProtos.SnapshotRegionManifest.Builder region,
+          final SnapshotProtos.SnapshotRegionManifest.FamilyFiles.Builder family,
+          final StoreFileInfo storeFile) throws IOException {
+        try {
+          LOG.debug("Introducing delay before adding store file to manifest");
+          Thread.sleep(2000);
+        } catch (InterruptedException ex) {
+          LOG.error("Interrupted due to error: " + ex);
+        }
+        super.storeFile(region, family, storeFile);
+      }
+    };
+  }
+
+  private Table setupTable(TableName tableName) throws Exception {
+    HTableDescriptor htd = new HTableDescriptor(tableName);
+    // Flush many files, but do not compact immediately
+    htd.setMemStoreFlushSize(5000).setConfiguration("hbase.hstore.compactionThreshold", "250");
+    // Make sure the region does not split
+    htd.setRegionSplitPolicyClassName(ConstantSizeRegionSplitPolicy.class.getName());
+    htd.setMaxFileSize(100 * 1024 * 1024);
+
+    byte[] fam = Bytes.toBytes("fam");
+    Table table = TEST_UTIL.createTable(htd, new byte[][] {fam},
+        TEST_UTIL.getConfiguration());
+    TEST_UTIL.loadTable(table, fam);
+    return table;
+  }
+}