You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ec...@apache.org on 2015/10/14 22:57:49 UTC

[1/4] hbase git commit: HBASE-14570 Split TestHBaseFsck in order to help with hanging tests

Repository: hbase
Updated Branches:
  refs/heads/master fe0bdbe48 -> fbd2ed2e0


http://git-wip-us.apache.org/repos/asf/hbase/blob/fbd2ed2e/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckTwoRS.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckTwoRS.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckTwoRS.java
new file mode 100644
index 0000000..4eb1dd8
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckTwoRS.java
@@ -0,0 +1,464 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util;
+
+
+import com.google.common.collect.Multimap;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.MetaTableAccessor;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.ClusterConnection;
+import org.apache.hadoop.hbase.client.Durability;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.client.HConnection;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
+import org.apache.hadoop.hbase.master.AssignmentManager;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MiscTests;
+import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import java.util.concurrent.ScheduledThreadPoolExecutor;
+import java.util.concurrent.SynchronousQueue;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+
+import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.*;
+import static org.junit.Assert.*;
+
+@Category({MiscTests.class, LargeTests.class})
+public class TestHBaseFsckTwoRS extends BaseTestHBaseFsck {
+
+  @BeforeClass
+  public static void setUpBeforeClass() throws Exception {
+    TEST_UTIL.getConfiguration().set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY,
+        MasterSyncObserver.class.getName());
+
+    conf.setInt("hbase.regionserver.handler.count", 2);
+    conf.setInt("hbase.regionserver.metahandler.count", 30);
+
+    conf.setInt("hbase.htable.threads.max", POOL_SIZE);
+    conf.setInt("hbase.hconnection.threads.max", 2 * POOL_SIZE);
+    conf.setInt("hbase.hconnection.threads.core", POOL_SIZE);
+    conf.setInt("hbase.hbck.close.timeout", 2 * REGION_ONLINE_TIMEOUT);
+    conf.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY, 8 * REGION_ONLINE_TIMEOUT);
+    TEST_UTIL.startMiniCluster(2);
+
+    tableExecutorService = new ThreadPoolExecutor(1, POOL_SIZE, 60, TimeUnit.SECONDS,
+        new SynchronousQueue<Runnable>(), Threads.newDaemonThreadFactory("testhbck"));
+
+    hbfsckExecutorService = new ScheduledThreadPoolExecutor(POOL_SIZE);
+
+    AssignmentManager assignmentManager =
+        TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
+    regionStates = assignmentManager.getRegionStates();
+
+    connection = (ClusterConnection) TEST_UTIL.getConnection();
+
+    admin = connection.getAdmin();
+    admin.setBalancerRunning(false, true);
+
+    TEST_UTIL.waitUntilAllRegionsAssigned(TableName.META_TABLE_NAME);
+    TEST_UTIL.waitUntilAllRegionsAssigned(TableName.NAMESPACE_TABLE_NAME);
+  }
+
+  @AfterClass
+  public static void tearDownAfterClass() throws Exception {
+    tableExecutorService.shutdown();
+    hbfsckExecutorService.shutdown();
+    admin.close();
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  @Before
+  public void setUp() {
+    EnvironmentEdgeManager.reset();
+  }
+
+  @Test(timeout=180000)
+  public void testFixAssignmentsWhenMETAinTransition() throws Exception {
+    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
+    admin.closeRegion(cluster.getServerHoldingMeta(), HRegionInfo.FIRST_META_REGIONINFO);
+    regionStates.regionOffline(HRegionInfo.FIRST_META_REGIONINFO);
+    new MetaTableLocator().deleteMetaLocation(cluster.getMaster().getZooKeeper());
+    assertFalse(regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO));
+    HBaseFsck hbck = doFsck(conf, true);
+    assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.UNKNOWN, HBaseFsck.ErrorReporter.ERROR_CODE.NO_META_REGION,
+        HBaseFsck.ErrorReporter.ERROR_CODE.NULL_META_REGION });
+    assertNoErrors(doFsck(conf, false));
+  }
+
+  /**
+   * This create and fixes a bad table with regions that have a duplicate
+   * start key
+   */
+  @Test (timeout=180000)
+  public void testDupeStartKey() throws Exception {
+    TableName table =
+        TableName.valueOf("tableDupeStartKey");
+    try {
+      setupTable(table);
+      assertNoErrors(doFsck(conf, false));
+      assertEquals(ROWKEYS.length, countRows());
+
+      // Now let's mess it up, by adding a region with a duplicate startkey
+      HRegionInfo hriDupe =
+          createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("A2"));
+      TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
+      TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
+          .waitForAssignment(hriDupe);
+      ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
+      TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS, HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS });
+      assertEquals(2, hbck.getOverlapGroups(table).size());
+      assertEquals(ROWKEYS.length, countRows()); // seems like the "bigger" region won.
+
+      // fix the degenerate region.
+      doFsck(conf, true);
+
+      // check that the degenerate region is gone and no data loss
+      HBaseFsck hbck2 = doFsck(conf,false);
+      assertNoErrors(hbck2);
+      assertEquals(0, hbck2.getOverlapGroups(table).size());
+      assertEquals(ROWKEYS.length, countRows());
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * This create and fixes a bad table with regions that have a duplicate
+   * start key
+   */
+  @Test (timeout=180000)
+  public void testDupeRegion() throws Exception {
+    TableName table =
+        TableName.valueOf("tableDupeRegion");
+    try {
+      setupTable(table);
+      assertNoErrors(doFsck(conf, false));
+      assertEquals(ROWKEYS.length, countRows());
+
+      // Now let's mess it up, by adding a region with a duplicate startkey
+      HRegionInfo hriDupe =
+          createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"));
+
+      TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
+      TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
+          .waitForAssignment(hriDupe);
+      ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
+      TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
+
+      // Yikes! The assignment manager can't tell between diff between two
+      // different regions with the same start/endkeys since it doesn't
+      // differentiate on ts/regionId!  We actually need to recheck
+      // deployments!
+      while (findDeployedHSI(getDeployedHRIs((HBaseAdmin) admin), hriDupe) == null) {
+        Thread.sleep(250);
+      }
+
+      LOG.debug("Finished assignment of dupe region");
+
+      // TODO why is dupe region different from dupe start keys?
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS, HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS });
+      assertEquals(2, hbck.getOverlapGroups(table).size());
+      assertEquals(ROWKEYS.length, countRows()); // seems like the "bigger" region won.
+
+      // fix the degenerate region.
+      doFsck(conf, true);
+
+      // check that the degenerate region is gone and no data loss
+      HBaseFsck hbck2 = doFsck(conf,false);
+      assertNoErrors(hbck2);
+      assertEquals(0, hbck2.getOverlapGroups(table).size());
+      assertEquals(ROWKEYS.length, countRows());
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+
+  /**
+   * This creates and fixes a bad table where a region is completely contained
+   * by another region.
+   */
+  @Test (timeout=180000)
+  public void testContainedRegionOverlap() throws Exception {
+    TableName table =
+        TableName.valueOf("tableContainedRegionOverlap");
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // Mess it up by creating an overlap in the metadata
+      HRegionInfo hriOverlap =
+          createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B"));
+      TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
+      TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
+          .waitForAssignment(hriOverlap);
+      ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
+      TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
+      assertEquals(2, hbck.getOverlapGroups(table).size());
+      assertEquals(ROWKEYS.length, countRows());
+
+      // fix the problem.
+      doFsck(conf, true);
+
+      // verify that overlaps are fixed
+      HBaseFsck hbck2 = doFsck(conf,false);
+      assertNoErrors(hbck2);
+      assertEquals(0, hbck2.getOverlapGroups(table).size());
+      assertEquals(ROWKEYS.length, countRows());
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * Test fixing lingering reference file.
+   */
+  @Test (timeout=180000)
+  public void testLingeringReferenceFile() throws Exception {
+    TableName table =
+        TableName.valueOf("testLingeringReferenceFile");
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // Mess it up by creating a fake reference file
+      FileSystem fs = FileSystem.get(conf);
+      Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
+      Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
+      Path famDir = new Path(regionDir, FAM_STR);
+      Path fakeReferenceFile = new Path(famDir, "fbce357483ceea.12144538");
+      fs.create(fakeReferenceFile);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_REFERENCE_HFILE });
+      // fix reference file
+      doFsck(conf, true);
+      // check that reference file fixed
+      assertNoErrors(doFsck(conf, false));
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  @Test (timeout=180000)
+  public void testMetaOffline() throws Exception {
+    // check no errors
+    HBaseFsck hbck = doFsck(conf, false);
+    assertNoErrors(hbck);
+    deleteMetaRegion(conf, true, false, false);
+    hbck = doFsck(conf, false);
+    // ERROR_CODE.UNKNOWN is coming because we reportError with a message for the hbase:meta
+    // inconsistency and whether we will be fixing it or not.
+    assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NO_META_REGION, HBaseFsck.ErrorReporter.ERROR_CODE.UNKNOWN });
+    hbck = doFsck(conf, true);
+    assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NO_META_REGION, HBaseFsck.ErrorReporter.ERROR_CODE.UNKNOWN });
+    hbck = doFsck(conf, false);
+    assertNoErrors(hbck);
+  }
+
+  /**
+   * This creates and fixes a bad table where an overlap group of
+   * 3 regions. Set HBaseFsck.maxMerge to 2 to trigger sideline overlapped
+   * region. Mess around the meta data so that closeRegion/offlineRegion
+   * throws exceptions.
+   */
+  @Test (timeout=180000)
+  public void testSidelineOverlapRegion() throws Exception {
+    TableName table =
+        TableName.valueOf("testSidelineOverlapRegion");
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // Mess it up by creating an overlap
+      MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
+      HMaster master = cluster.getMaster();
+      HRegionInfo hriOverlap1 =
+          createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("AB"));
+      master.assignRegion(hriOverlap1);
+      master.getAssignmentManager().waitForAssignment(hriOverlap1);
+      HRegionInfo hriOverlap2 =
+          createRegion(tbl.getTableDescriptor(), Bytes.toBytes("AB"), Bytes.toBytes("B"));
+      master.assignRegion(hriOverlap2);
+      master.getAssignmentManager().waitForAssignment(hriOverlap2);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS,
+          HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS, HBaseFsck.ErrorReporter.ERROR_CODE.OVERLAP_IN_REGION_CHAIN});
+      assertEquals(3, hbck.getOverlapGroups(table).size());
+      assertEquals(ROWKEYS.length, countRows());
+
+      // mess around the overlapped regions, to trigger NotServingRegionException
+      Multimap<byte[], HBaseFsck.HbckInfo> overlapGroups = hbck.getOverlapGroups(table);
+      ServerName serverName = null;
+      byte[] regionName = null;
+      for (HBaseFsck.HbckInfo hbi: overlapGroups.values()) {
+        if ("A".equals(Bytes.toString(hbi.getStartKey()))
+            && "B".equals(Bytes.toString(hbi.getEndKey()))) {
+          regionName = hbi.getRegionName();
+
+          // get an RS not serving the region to force bad assignment info in to META.
+          int k = cluster.getServerWith(regionName);
+          for (int i = 0; i < 3; i++) {
+            if (i != k) {
+              HRegionServer rs = cluster.getRegionServer(i);
+              serverName = rs.getServerName();
+              break;
+            }
+          }
+
+          HBaseFsckRepair.closeRegionSilentlyAndWait((HConnection) connection,
+              cluster.getRegionServer(k).getServerName(), hbi.getHdfsHRI());
+          admin.offline(regionName);
+          break;
+        }
+      }
+
+      assertNotNull(regionName);
+      assertNotNull(serverName);
+      try (Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService)) {
+        Put put = new Put(regionName);
+        put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
+            Bytes.toBytes(serverName.getHostAndPort()));
+        meta.put(put);
+      }
+
+      // fix the problem.
+      HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
+      fsck.connect();
+      HBaseFsck.setDisplayFullReport(); // i.e. -details
+      fsck.setTimeLag(0);
+      fsck.setFixAssignments(true);
+      fsck.setFixMeta(true);
+      fsck.setFixHdfsHoles(true);
+      fsck.setFixHdfsOverlaps(true);
+      fsck.setFixHdfsOrphans(true);
+      fsck.setFixVersionFile(true);
+      fsck.setSidelineBigOverlaps(true);
+      fsck.setMaxMerge(2);
+      fsck.onlineHbck();
+      fsck.close();
+
+      // verify that overlaps are fixed, and there are less rows
+      // since one region is sidelined.
+      HBaseFsck hbck2 = doFsck(conf,false);
+      assertNoErrors(hbck2);
+      assertEquals(0, hbck2.getOverlapGroups(table).size());
+      assertTrue(ROWKEYS.length > countRows());
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  @Test(timeout=180000)
+  public void testHBaseFsck() throws Exception {
+    assertNoErrors(doFsck(conf, false));
+    TableName table = TableName.valueOf("tableBadMetaAssign");
+    HTableDescriptor desc = new HTableDescriptor(table);
+    HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
+    desc.addFamily(hcd); // If a table has no CF's it doesn't get checked
+    createTable(TEST_UTIL, desc, null);
+
+    // We created 1 table, should be fine
+    assertNoErrors(doFsck(conf, false));
+
+    // Now let's mess it up and change the assignment in hbase:meta to
+    // point to a different region server
+    Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
+    Scan scan = new Scan();
+    scan.setStartRow(Bytes.toBytes(table+",,"));
+    ResultScanner scanner = meta.getScanner(scan);
+    HRegionInfo hri = null;
+
+    Result res = scanner.next();
+    ServerName currServer =
+        ServerName.parseFrom(res.getValue(HConstants.CATALOG_FAMILY,
+            HConstants.SERVER_QUALIFIER));
+    long startCode = Bytes.toLong(res.getValue(HConstants.CATALOG_FAMILY,
+        HConstants.STARTCODE_QUALIFIER));
+
+    for (JVMClusterUtil.RegionServerThread rs :
+        TEST_UTIL.getHBaseCluster().getRegionServerThreads()) {
+
+      ServerName sn = rs.getRegionServer().getServerName();
+
+      // When we find a diff RS, change the assignment and break
+      if (!currServer.getHostAndPort().equals(sn.getHostAndPort()) ||
+          startCode != sn.getStartcode()) {
+        Put put = new Put(res.getRow());
+        put.setDurability(Durability.SKIP_WAL);
+        put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
+            Bytes.toBytes(sn.getHostAndPort()));
+        put.add(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
+            Bytes.toBytes(sn.getStartcode()));
+        meta.put(put);
+        hri = MetaTableAccessor.getHRegionInfo(res);
+        break;
+      }
+    }
+
+    // Try to fix the data
+    assertErrors(doFsck(conf, true), new HBaseFsck.ErrorReporter.ERROR_CODE[]{
+        HBaseFsck.ErrorReporter.ERROR_CODE.SERVER_DOES_NOT_MATCH_META});
+
+    TEST_UTIL.getHBaseCluster().getMaster()
+        .getAssignmentManager().waitForAssignment(hri);
+
+    // Should be fixed now
+    assertNoErrors(doFsck(conf, false));
+
+    // comment needed - what is the purpose of this line
+    Table t = connection.getTable(table, tableExecutorService);
+    ResultScanner s = t.getScanner(new Scan());
+    s.close();
+    t.close();
+
+    scanner.close();
+    meta.close();
+  }
+}


[3/4] hbase git commit: HBASE-14570 Split TestHBaseFsck in order to help with hanging tests

Posted by ec...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/fbd2ed2e/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
deleted file mode 100644
index 3562a69..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
+++ /dev/null
@@ -1,2914 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.util;
-
-import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
-import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertNoErrors;
-import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.NavigableMap;
-import java.util.Set;
-import java.util.UUID;
-import java.util.concurrent.Callable;
-import java.util.concurrent.CountDownLatch;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.ScheduledThreadPoolExecutor;
-import java.util.concurrent.SynchronousQueue;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicBoolean;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.ClusterStatus;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.HRegionLocation;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.MiniHBaseCluster;
-import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.MetaTableAccessor;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.ClusterConnection;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Delete;
-import org.apache.hadoop.hbase.client.Durability;
-import org.apache.hadoop.hbase.client.Get;
-import org.apache.hadoop.hbase.client.HBaseAdmin;
-import org.apache.hadoop.hbase.client.HConnection;
-import org.apache.hadoop.hbase.client.HTable;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.client.RegionReplicaUtil;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.coprocessor.BaseMasterObserver;
-import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
-import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment;
-import org.apache.hadoop.hbase.coprocessor.ObserverContext;
-import org.apache.hadoop.hbase.io.hfile.TestHFile;
-import org.apache.hadoop.hbase.master.AssignmentManager;
-import org.apache.hadoop.hbase.master.HMaster;
-import org.apache.hadoop.hbase.master.RegionState;
-import org.apache.hadoop.hbase.master.RegionStates;
-import org.apache.hadoop.hbase.master.TableLockManager;
-import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
-import org.apache.hadoop.hbase.mob.MobFileName;
-import org.apache.hadoop.hbase.mob.MobUtils;
-import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
-import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
-import org.apache.hadoop.hbase.regionserver.HRegion;
-import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
-import org.apache.hadoop.hbase.regionserver.HRegionServer;
-import org.apache.hadoop.hbase.regionserver.SplitTransactionFactory;
-import org.apache.hadoop.hbase.regionserver.SplitTransactionImpl;
-import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MiscTests;
-import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter;
-import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
-import org.apache.hadoop.hbase.util.HBaseFsck.HbckInfo;
-import org.apache.hadoop.hbase.util.HBaseFsck.PrintingErrorReporter;
-import org.apache.hadoop.hbase.util.HBaseFsck.TableInfo;
-import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
-import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil;
-import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
-import org.apache.zookeeper.KeeperException;
-import org.junit.AfterClass;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Ignore;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-
-import com.google.common.collect.Multimap;
-
-/**
- * This tests HBaseFsck's ability to detect reasons for inconsistent tables.
- */
-@Category({MiscTests.class, LargeTests.class})
-public class TestHBaseFsck {
-  static final int POOL_SIZE = 7;
-  private static final Log LOG = LogFactory.getLog(TestHBaseFsck.class);
-  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
-  private final static Configuration conf = TEST_UTIL.getConfiguration();
-  private final static String FAM_STR = "fam";
-  private final static byte[] FAM = Bytes.toBytes(FAM_STR);
-  private final static int REGION_ONLINE_TIMEOUT = 800;
-  private static RegionStates regionStates;
-  private static ExecutorService tableExecutorService;
-  private static ScheduledThreadPoolExecutor hbfsckExecutorService;
-  private static ClusterConnection connection;
-  private static Admin admin;
-
-  // for the instance, reset every test run
-  private Table tbl;
-  private final static byte[][] SPLITS = new byte[][] { Bytes.toBytes("A"),
-    Bytes.toBytes("B"), Bytes.toBytes("C") };
-  // one row per region.
-  private final static byte[][] ROWKEYS= new byte[][] {
-    Bytes.toBytes("00"), Bytes.toBytes("50"), Bytes.toBytes("A0"), Bytes.toBytes("A5"),
-    Bytes.toBytes("B0"), Bytes.toBytes("B5"), Bytes.toBytes("C0"), Bytes.toBytes("C5") };
-
-  @BeforeClass
-  public static void setUpBeforeClass() throws Exception {
-    TEST_UTIL.getConfiguration().set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY,
-        MasterSyncObserver.class.getName());
-
-    conf.setInt("hbase.regionserver.handler.count", 2);
-    conf.setInt("hbase.regionserver.metahandler.count", 30);
-
-    conf.setInt("hbase.htable.threads.max", POOL_SIZE);
-    conf.setInt("hbase.hconnection.threads.max", 2 * POOL_SIZE);
-    conf.setInt("hbase.hconnection.threads.core", POOL_SIZE);
-    conf.setInt("hbase.hbck.close.timeout", 2 * REGION_ONLINE_TIMEOUT);
-    conf.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY, 8 * REGION_ONLINE_TIMEOUT);
-    TEST_UTIL.startMiniCluster(3);
-
-    tableExecutorService = new ThreadPoolExecutor(1, POOL_SIZE, 60, TimeUnit.SECONDS,
-        new SynchronousQueue<Runnable>(), Threads.newDaemonThreadFactory("testhbck"));
-
-    hbfsckExecutorService = new ScheduledThreadPoolExecutor(POOL_SIZE);
-
-    AssignmentManager assignmentManager =
-      TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
-    regionStates = assignmentManager.getRegionStates();
-
-    connection = (ClusterConnection) TEST_UTIL.getConnection();
-
-    admin = connection.getAdmin();
-    admin.setBalancerRunning(false, true);
-
-    TEST_UTIL.waitUntilAllRegionsAssigned(TableName.META_TABLE_NAME);
-    TEST_UTIL.waitUntilAllRegionsAssigned(TableName.NAMESPACE_TABLE_NAME);
-  }
-
-  @AfterClass
-  public static void tearDownAfterClass() throws Exception {
-    tableExecutorService.shutdown();
-    hbfsckExecutorService.shutdown();
-    admin.close();
-    TEST_UTIL.shutdownMiniCluster();
-  }
-
-  @Before
-  public void setUp() {
-    EnvironmentEdgeManager.reset();
-  }
-
-  @Test (timeout=180000)
-  public void testHBaseFsck() throws Exception {
-    assertNoErrors(doFsck(conf, false));
-    TableName table = TableName.valueOf("tableBadMetaAssign");
-    HTableDescriptor desc = new HTableDescriptor(table);
-    HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
-    desc.addFamily(hcd); // If a table has no CF's it doesn't get checked
-    createTable(TEST_UTIL, desc, null);
-
-    // We created 1 table, should be fine
-    assertNoErrors(doFsck(conf, false));
-
-    // Now let's mess it up and change the assignment in hbase:meta to
-    // point to a different region server
-    Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
-    Scan scan = new Scan();
-    scan.setStartRow(Bytes.toBytes(table+",,"));
-    ResultScanner scanner = meta.getScanner(scan);
-    HRegionInfo hri = null;
-
-    Result res = scanner.next();
-    ServerName currServer =
-      ServerName.parseFrom(res.getValue(HConstants.CATALOG_FAMILY,
-          HConstants.SERVER_QUALIFIER));
-    long startCode = Bytes.toLong(res.getValue(HConstants.CATALOG_FAMILY,
-        HConstants.STARTCODE_QUALIFIER));
-
-    for (JVMClusterUtil.RegionServerThread rs :
-        TEST_UTIL.getHBaseCluster().getRegionServerThreads()) {
-
-      ServerName sn = rs.getRegionServer().getServerName();
-
-      // When we find a diff RS, change the assignment and break
-      if (!currServer.getHostAndPort().equals(sn.getHostAndPort()) ||
-          startCode != sn.getStartcode()) {
-        Put put = new Put(res.getRow());
-        put.setDurability(Durability.SKIP_WAL);
-        put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
-          Bytes.toBytes(sn.getHostAndPort()));
-        put.add(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
-          Bytes.toBytes(sn.getStartcode()));
-        meta.put(put);
-        hri = MetaTableAccessor.getHRegionInfo(res);
-        break;
-      }
-    }
-
-    // Try to fix the data
-    assertErrors(doFsck(conf, true), new ERROR_CODE[]{
-        ERROR_CODE.SERVER_DOES_NOT_MATCH_META});
-
-    TEST_UTIL.getHBaseCluster().getMaster()
-      .getAssignmentManager().waitForAssignment(hri);
-
-    // Should be fixed now
-    assertNoErrors(doFsck(conf, false));
-
-    // comment needed - what is the purpose of this line
-    Table t = connection.getTable(table, tableExecutorService);
-    ResultScanner s = t.getScanner(new Scan());
-    s.close();
-    t.close();
-
-    scanner.close();
-    meta.close();
-  }
-
-  @Test(timeout=180000)
-  public void testFixAssignmentsWhenMETAinTransition() throws Exception {
-    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
-    admin.closeRegion(cluster.getServerHoldingMeta(), HRegionInfo.FIRST_META_REGIONINFO);
-    regionStates.regionOffline(HRegionInfo.FIRST_META_REGIONINFO);
-    new MetaTableLocator().deleteMetaLocation(cluster.getMaster().getZooKeeper());
-    assertFalse(regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO));
-    HBaseFsck hbck = doFsck(conf, true);
-    assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.UNKNOWN, ERROR_CODE.NO_META_REGION,
-        ERROR_CODE.NULL_META_REGION });
-    assertNoErrors(doFsck(conf, false));
-  }
-
-  /**
-   * Create a new region in META.
-   */
-  private HRegionInfo createRegion(final HTableDescriptor
-      htd, byte[] startKey, byte[] endKey)
-      throws IOException {
-    Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
-    HRegionInfo hri = new HRegionInfo(htd.getTableName(), startKey, endKey);
-    MetaTableAccessor.addRegionToMeta(meta, hri);
-    meta.close();
-    return hri;
-  }
-
-  /**
-   * Debugging method to dump the contents of meta.
-   */
-  private void dumpMeta(TableName tableName) throws IOException {
-    List<byte[]> metaRows = TEST_UTIL.getMetaTableRows(tableName);
-    for (byte[] row : metaRows) {
-      LOG.info(Bytes.toString(row));
-    }
-  }
-
-  /**
-   * This method is used to undeploy a region -- close it and attempt to
-   * remove its state from the Master.
-   */
-  private void undeployRegion(Connection conn, ServerName sn,
-      HRegionInfo hri) throws IOException, InterruptedException {
-    try {
-      HBaseFsckRepair.closeRegionSilentlyAndWait((HConnection) conn, sn, hri);
-      if (!hri.isMetaTable()) {
-        admin.offline(hri.getRegionName());
-      }
-    } catch (IOException ioe) {
-      LOG.warn("Got exception when attempting to offline region "
-          + Bytes.toString(hri.getRegionName()), ioe);
-    }
-  }
-  /**
-   * Delete a region from assignments, meta, or completely from hdfs.
-   * @param unassign if true unassign region if assigned
-   * @param metaRow  if true remove region's row from META
-   * @param hdfs if true remove region's dir in HDFS
-   */
-  private void deleteRegion(Configuration conf, final HTableDescriptor htd,
-      byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
-      boolean hdfs) throws IOException, InterruptedException {
-    deleteRegion(conf, htd, startKey, endKey, unassign, metaRow, hdfs, false,
-        HRegionInfo.DEFAULT_REPLICA_ID);
-  }
-
-  /**
-   * Delete a region from assignments, meta, or completely from hdfs.
-   * @param unassign if true unassign region if assigned
-   * @param metaRow  if true remove region's row from META
-   * @param hdfs if true remove region's dir in HDFS
-   * @param regionInfoOnly if true remove a region dir's .regioninfo file
-   * @param replicaId replica id
-   */
-  private void deleteRegion(Configuration conf, final HTableDescriptor htd,
-      byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
-      boolean hdfs, boolean regionInfoOnly, int replicaId)
-          throws IOException, InterruptedException {
-    LOG.info("** Before delete:");
-    dumpMeta(htd.getTableName());
-
-    List<HRegionLocation> locations;
-    try(RegionLocator rl = connection.getRegionLocator(tbl.getName())) {
-      locations = rl.getAllRegionLocations();
-    }
-
-    for (HRegionLocation location : locations) {
-      HRegionInfo hri = location.getRegionInfo();
-      ServerName hsa = location.getServerName();
-      if (Bytes.compareTo(hri.getStartKey(), startKey) == 0
-          && Bytes.compareTo(hri.getEndKey(), endKey) == 0
-          && hri.getReplicaId() == replicaId) {
-
-        LOG.info("RegionName: " +hri.getRegionNameAsString());
-        byte[] deleteRow = hri.getRegionName();
-
-        if (unassign) {
-          LOG.info("Undeploying region " + hri + " from server " + hsa);
-          undeployRegion(connection, hsa, hri);
-        }
-
-        if (regionInfoOnly) {
-          LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
-          Path rootDir = FSUtils.getRootDir(conf);
-          FileSystem fs = rootDir.getFileSystem(conf);
-          Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()),
-              hri.getEncodedName());
-          Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
-          fs.delete(hriPath, true);
-        }
-
-        if (hdfs) {
-          LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
-          Path rootDir = FSUtils.getRootDir(conf);
-          FileSystem fs = rootDir.getFileSystem(conf);
-          Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()),
-              hri.getEncodedName());
-          HBaseFsck.debugLsr(conf, p);
-          boolean success = fs.delete(p, true);
-          LOG.info("Deleted " + p + " sucessfully? " + success);
-          HBaseFsck.debugLsr(conf, p);
-        }
-
-        if (metaRow) {
-          try (Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService)) {
-            Delete delete = new Delete(deleteRow);
-            meta.delete(delete);
-          }
-        }
-      }
-      LOG.info(hri.toString() + hsa.toString());
-    }
-
-    TEST_UTIL.getMetaTableRows(htd.getTableName());
-    LOG.info("*** After delete:");
-    dumpMeta(htd.getTableName());
-  }
-
-  /**
-   * Setup a clean table before we start mucking with it.
-   *
-   * It will set tbl which needs to be closed after test
-   *
-   * @throws IOException
-   * @throws InterruptedException
-   * @throws KeeperException
-   */
-  void setupTable(TableName tablename) throws Exception {
-    setupTableWithRegionReplica(tablename, 1);
-  }
-
-  /**
-   * Setup a clean table with a certain region_replica count
-   *
-   * It will set tbl which needs to be closed after test
-   *
-   * @param tableName
-   * @param replicaCount
-   * @throws Exception
-   */
-  void setupTableWithRegionReplica(TableName tablename, int replicaCount) throws Exception {
-    HTableDescriptor desc = new HTableDescriptor(tablename);
-    desc.setRegionReplication(replicaCount);
-    HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
-    desc.addFamily(hcd); // If a table has no CF's it doesn't get checked
-    createTable(TEST_UTIL, desc, SPLITS);
-
-    tbl = connection.getTable(tablename, tableExecutorService);
-    List<Put> puts = new ArrayList<Put>();
-    for (byte[] row : ROWKEYS) {
-      Put p = new Put(row);
-      p.addColumn(FAM, Bytes.toBytes("val"), row);
-      puts.add(p);
-    }
-    tbl.put(puts);
-  }
-
-  /**
-   * Setup a clean table with a mob-enabled column.
-   *
-   * @param tableName The name of a table to be created.
-   * @throws Exception
-   */
-  void setupMobTable(TableName tablename) throws Exception {
-    HTableDescriptor desc = new HTableDescriptor(tablename);
-    HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
-    hcd.setMobEnabled(true);
-    hcd.setMobThreshold(0);
-    desc.addFamily(hcd); // If a table has no CF's it doesn't get checked
-    createTable(TEST_UTIL, desc, SPLITS);
-
-    tbl = connection.getTable(tablename, tableExecutorService);
-    List<Put> puts = new ArrayList<Put>();
-    for (byte[] row : ROWKEYS) {
-      Put p = new Put(row);
-      p.add(FAM, Bytes.toBytes("val"), row);
-      puts.add(p);
-    }
-    tbl.put(puts);
-  }
-
-  /**
-   * Counts the number of row to verify data loss or non-dataloss.
-   */
-  int countRows() throws IOException {
-     Scan s = new Scan();
-     ResultScanner rs = tbl.getScanner(s);
-     int i = 0;
-     while(rs.next() !=null) {
-       i++;
-     }
-     return i;
-  }
-
-  /**
-   * delete table in preparation for next test
-   *
-   * @param tablename
-   * @throws IOException
-   */
-  void cleanupTable(TableName tablename) throws Exception {
-    if (tbl != null) {
-      tbl.close();
-      tbl = null;
-    }
-
-    ((ClusterConnection) connection).clearRegionCache();
-    deleteTable(TEST_UTIL, tablename);
-  }
-
-  /**
-   * This creates a clean table and confirms that the table is clean.
-   */
-  @Test (timeout=180000)
-  public void testHBaseFsckClean() throws Exception {
-    assertNoErrors(doFsck(conf, false));
-    TableName table = TableName.valueOf("tableClean");
-    try {
-      HBaseFsck hbck = doFsck(conf, false);
-      assertNoErrors(hbck);
-
-      setupTable(table);
-      assertEquals(ROWKEYS.length, countRows());
-
-      // We created 1 table, should be fine
-      hbck = doFsck(conf, false);
-      assertNoErrors(hbck);
-      assertEquals(0, hbck.getOverlapGroups(table).size());
-      assertEquals(ROWKEYS.length, countRows());
-    } finally {
-      cleanupTable(table);
-    }
-  }
-
-  /**
-   * Test thread pooling in the case where there are more regions than threads
-   */
-  @Test (timeout=180000)
-  public void testHbckThreadpooling() throws Exception {
-    TableName table =
-        TableName.valueOf("tableDupeStartKey");
-    try {
-      // Create table with 4 regions
-      setupTable(table);
-
-      // limit number of threads to 1.
-      Configuration newconf = new Configuration(conf);
-      newconf.setInt("hbasefsck.numthreads", 1);
-      assertNoErrors(doFsck(newconf, false));
-
-      // We should pass without triggering a RejectedExecutionException
-    } finally {
-      cleanupTable(table);
-    }
-  }
-
-  @Test (timeout=180000)
-  public void testHbckFixOrphanTable() throws Exception {
-    TableName table = TableName.valueOf("tableInfo");
-    FileSystem fs = null;
-    Path tableinfo = null;
-    try {
-      setupTable(table);
-
-      Path hbaseTableDir = FSUtils.getTableDir(
-          FSUtils.getRootDir(conf), table);
-      fs = hbaseTableDir.getFileSystem(conf);
-      FileStatus status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
-      tableinfo = status.getPath();
-      fs.rename(tableinfo, new Path("/.tableinfo"));
-
-      //to report error if .tableinfo is missing.
-      HBaseFsck hbck = doFsck(conf, false);
-      assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_TABLEINFO_FILE });
-
-      // fix OrphanTable with default .tableinfo (htd not yet cached on master)
-      hbck = doFsck(conf, true);
-      assertNoErrors(hbck);
-      status = null;
-      status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
-      assertNotNull(status);
-
-      HTableDescriptor htd = admin.getTableDescriptor(table);
-      htd.setValue("NOT_DEFAULT", "true");
-      admin.disableTable(table);
-      admin.modifyTable(table, htd);
-      admin.enableTable(table);
-      fs.delete(status.getPath(), true);
-
-      // fix OrphanTable with cache
-      htd = admin.getTableDescriptor(table); // warms up cached htd on master
-      hbck = doFsck(conf, true);
-      assertNoErrors(hbck);
-      status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
-      assertNotNull(status);
-      htd = admin.getTableDescriptor(table);
-      assertEquals(htd.getValue("NOT_DEFAULT"), "true");
-    } finally {
-      fs.rename(new Path("/.tableinfo"), tableinfo);
-      cleanupTable(table);
-    }
-  }
-
-  /**
-   * This test makes sure that parallel instances of Hbck is disabled.
-   *
-   * @throws Exception
-   */
-  @Test (timeout=180000)
-  public void testParallelHbck() throws Exception {
-    final ExecutorService service;
-    final Future<HBaseFsck> hbck1,hbck2;
-
-    class RunHbck implements Callable<HBaseFsck>{
-      boolean fail = true;
-      @Override
-      public HBaseFsck call(){
-        Configuration c = new Configuration(conf);
-        c.setInt("hbase.hbck.lockfile.attempts", 1);
-        // HBASE-13574 found that in HADOOP-2.6 and later, the create file would internally retry.
-        // To avoid flakiness of the test, set low max wait time.
-        c.setInt("hbase.hbck.lockfile.maxwaittime", 3);
-        try{
-          return doFsck(c, true); // Exclusive hbck only when fixing
-        } catch(Exception e){
-          if (e.getMessage().contains("Duplicate hbck")) {
-            fail = false;
-          }
-        }
-        // If we reach here, then an exception was caught
-        if (fail) fail();
-        return null;
-      }
-    }
-    service = Executors.newFixedThreadPool(2);
-    hbck1 = service.submit(new RunHbck());
-    hbck2 = service.submit(new RunHbck());
-    service.shutdown();
-    //wait for 15 seconds, for both hbck calls finish
-    service.awaitTermination(15, TimeUnit.SECONDS);
-    HBaseFsck h1 = hbck1.get();
-    HBaseFsck h2 = hbck2.get();
-    // Make sure only one of the calls was successful
-    assert(h1 == null || h2 == null);
-    if (h1 != null) {
-      assert(h1.getRetCode() >= 0);
-    }
-    if (h2 != null) {
-      assert(h2.getRetCode() >= 0);
-    }
-  }
-
-  /**
-   * This test makes sure that with enough retries both parallel instances
-   * of hbck will be completed successfully.
-   *
-   * @throws Exception
-   */
-  @Test (timeout=180000)
-  public void testParallelWithRetriesHbck() throws Exception {
-    final ExecutorService service;
-    final Future<HBaseFsck> hbck1,hbck2;
-
-    // With the ExponentialBackoffPolicyWithLimit (starting with 200 milliseconds sleep time, and
-    // max sleep time of 5 seconds), we can retry around 15 times within 80 seconds before bail out.
-    //
-    // Note: the reason to use 80 seconds is that in HADOOP-2.6 and later, the create file would
-    // retry up to HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds).  See HBASE-13574 for more
-    // details.
-    final int timeoutInSeconds = 80;
-    final int sleepIntervalInMilliseconds = 200;
-    final int maxSleepTimeInMilliseconds = 6000;
-    final int maxRetryAttempts = 15;
-
-    class RunHbck implements Callable<HBaseFsck>{
-
-      @Override
-      public HBaseFsck call() throws Exception {
-        // Increase retry attempts to make sure the non-active hbck doesn't get starved
-        Configuration c = new Configuration(conf);
-        c.setInt("hbase.hbck.lockfile.maxwaittime", timeoutInSeconds);
-        c.setInt("hbase.hbck.lockfile.attempt.sleep.interval", sleepIntervalInMilliseconds);
-        c.setInt("hbase.hbck.lockfile.attempt.maxsleeptime", maxSleepTimeInMilliseconds);
-        c.setInt("hbase.hbck.lockfile.attempts", maxRetryAttempts);
-        return doFsck(c, false);
-      }
-    }
-
-    service = Executors.newFixedThreadPool(2);
-    hbck1 = service.submit(new RunHbck());
-    hbck2 = service.submit(new RunHbck());
-    service.shutdown();
-    //wait for some time, for both hbck calls finish
-    service.awaitTermination(timeoutInSeconds * 2, TimeUnit.SECONDS);
-    HBaseFsck h1 = hbck1.get();
-    HBaseFsck h2 = hbck2.get();
-    // Both should be successful
-    assertNotNull(h1);
-    assertNotNull(h2);
-    assert(h1.getRetCode() >= 0);
-    assert(h2.getRetCode() >= 0);
-
-  }
-
-  /**
-   * This create and fixes a bad table with regions that have a duplicate
-   * start key
-   */
-  @Test (timeout=180000)
-  public void testDupeStartKey() throws Exception {
-    TableName table =
-        TableName.valueOf("tableDupeStartKey");
-    try {
-      setupTable(table);
-      assertNoErrors(doFsck(conf, false));
-      assertEquals(ROWKEYS.length, countRows());
-
-      // Now let's mess it up, by adding a region with a duplicate startkey
-      HRegionInfo hriDupe =
-          createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("A2"));
-      TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
-      TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
-          .waitForAssignment(hriDupe);
-      ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
-      TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
-
-      HBaseFsck hbck = doFsck(conf, false);
-      assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.DUPE_STARTKEYS });
-      assertEquals(2, hbck.getOverlapGroups(table).size());
-      assertEquals(ROWKEYS.length, countRows()); // seems like the "bigger" region won.
-
-      // fix the degenerate region.
-      doFsck(conf, true);
-
-      // check that the degenerate region is gone and no data loss
-      HBaseFsck hbck2 = doFsck(conf,false);
-      assertNoErrors(hbck2);
-      assertEquals(0, hbck2.getOverlapGroups(table).size());
-      assertEquals(ROWKEYS.length, countRows());
-    } finally {
-      cleanupTable(table);
-    }
-  }
-
-  /*
-   * This creates a table with region_replica > 1 and verifies hbck runs
-   * successfully
-   */
-  @Test (timeout=180000)
-  public void testHbckWithRegionReplica() throws Exception {
-    TableName table =
-        TableName.valueOf("testHbckWithRegionReplica");
-    try {
-      setupTableWithRegionReplica(table, 2);
-      admin.flush(table);
-      assertNoErrors(doFsck(conf, false));
-    } finally {
-      cleanupTable(table);
-    }
-  }
-
-  @Test (timeout=180000)
-  public void testHbckWithFewerReplica() throws Exception {
-    TableName table =
-        TableName.valueOf("testHbckWithFewerReplica");
-    try {
-      setupTableWithRegionReplica(table, 2);
-      admin.flush(table);
-      assertNoErrors(doFsck(conf, false));
-      assertEquals(ROWKEYS.length, countRows());
-      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true,
-          false, false, false, 1); // unassign one replica
-      // check that problem exists
-      HBaseFsck hbck = doFsck(conf, false);
-      assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_DEPLOYED });
-      // fix the problem
-      hbck = doFsck(conf, true);
-      // run hbck again to make sure we don't see any errors
-      hbck = doFsck(conf, false);
-      assertErrors(hbck, new ERROR_CODE[] {});
-    } finally {
-      cleanupTable(table);
-    }
-  }
-
-  @Test (timeout=180000)
-  public void testHbckWithExcessReplica() throws Exception {
-    TableName table =
-        TableName.valueOf("testHbckWithExcessReplica");
-    try {
-      setupTableWithRegionReplica(table, 2);
-      admin.flush(table);
-      assertNoErrors(doFsck(conf, false));
-      assertEquals(ROWKEYS.length, countRows());
-      // the next few lines inject a location in meta for a replica, and then
-      // asks the master to assign the replica (the meta needs to be injected
-      // for the master to treat the request for assignment as valid; the master
-      // checks the region is valid either from its memory or meta)
-      Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
-      List<HRegionInfo> regions = admin.getTableRegions(table);
-      byte[] startKey = Bytes.toBytes("B");
-      byte[] endKey = Bytes.toBytes("C");
-      byte[] metaKey = null;
-      HRegionInfo newHri = null;
-      for (HRegionInfo h : regions) {
-        if (Bytes.compareTo(h.getStartKey(), startKey) == 0  &&
-            Bytes.compareTo(h.getEndKey(), endKey) == 0 &&
-            h.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
-          metaKey = h.getRegionName();
-          //create a hri with replicaId as 2 (since we already have replicas with replicaid 0 and 1)
-          newHri = RegionReplicaUtil.getRegionInfoForReplica(h, 2);
-          break;
-        }
-      }
-      Put put = new Put(metaKey);
-      Collection<ServerName> var = admin.getClusterStatus().getServers();
-      ServerName sn = var.toArray(new ServerName[var.size()])[0];
-      //add a location with replicaId as 2 (since we already have replicas with replicaid 0 and 1)
-      MetaTableAccessor.addLocation(put, sn, sn.getStartcode(), -1, 2);
-      meta.put(put);
-      // assign the new replica
-      HBaseFsckRepair.fixUnassigned(admin, newHri);
-      HBaseFsckRepair.waitUntilAssigned(admin, newHri);
-      // now reset the meta row to its original value
-      Delete delete = new Delete(metaKey);
-      delete.addColumns(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(2));
-      delete.addColumns(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(2));
-      delete.addColumns(HConstants.CATALOG_FAMILY, MetaTableAccessor.getSeqNumColumn(2));
-      meta.delete(delete);
-      meta.close();
-      // check that problem exists
-      HBaseFsck hbck = doFsck(conf, false);
-      assertErrors(hbck, new ERROR_CODE[]{ERROR_CODE.NOT_IN_META});
-      // fix the problem
-      hbck = doFsck(conf, true);
-      // run hbck again to make sure we don't see any errors
-      hbck = doFsck(conf, false);
-      assertErrors(hbck, new ERROR_CODE[]{});
-    } finally {
-      cleanupTable(table);
-    }
-  }
-  /**
-   * Get region info from local cluster.
-   */
-  Map<ServerName, List<String>> getDeployedHRIs(final HBaseAdmin admin) throws IOException {
-    ClusterStatus status = admin.getClusterStatus();
-    Collection<ServerName> regionServers = status.getServers();
-    Map<ServerName, List<String>> mm =
-        new HashMap<ServerName, List<String>>();
-    for (ServerName hsi : regionServers) {
-      AdminProtos.AdminService.BlockingInterface server = ((HConnection) connection).getAdmin(hsi);
-
-      // list all online regions from this region server
-      List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
-      List<String> regionNames = new ArrayList<String>();
-      for (HRegionInfo hri : regions) {
-        regionNames.add(hri.getRegionNameAsString());
-      }
-      mm.put(hsi, regionNames);
-    }
-    return mm;
-  }
-
-  /**
-   * Returns the HSI a region info is on.
-   */
-  ServerName findDeployedHSI(Map<ServerName, List<String>> mm, HRegionInfo hri) {
-    for (Map.Entry<ServerName,List <String>> e : mm.entrySet()) {
-      if (e.getValue().contains(hri.getRegionNameAsString())) {
-        return e.getKey();
-      }
-    }
-    return null;
-  }
-
-  /**
-   * This create and fixes a bad table with regions that have a duplicate
-   * start key
-   */
-  @Test (timeout=180000)
-  public void testDupeRegion() throws Exception {
-    TableName table =
-        TableName.valueOf("tableDupeRegion");
-    try {
-      setupTable(table);
-      assertNoErrors(doFsck(conf, false));
-      assertEquals(ROWKEYS.length, countRows());
-
-      // Now let's mess it up, by adding a region with a duplicate startkey
-      HRegionInfo hriDupe =
-          createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"));
-
-      TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
-      TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
-          .waitForAssignment(hriDupe);
-      ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
-      TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
-
-      // Yikes! The assignment manager can't tell between diff between two
-      // different regions with the same start/endkeys since it doesn't
-      // differentiate on ts/regionId!  We actually need to recheck
-      // deployments!
-      while (findDeployedHSI(getDeployedHRIs((HBaseAdmin) admin), hriDupe) == null) {
-        Thread.sleep(250);
-      }
-
-      LOG.debug("Finished assignment of dupe region");
-
-      // TODO why is dupe region different from dupe start keys?
-      HBaseFsck hbck = doFsck(conf, false);
-      assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.DUPE_STARTKEYS });
-      assertEquals(2, hbck.getOverlapGroups(table).size());
-      assertEquals(ROWKEYS.length, countRows()); // seems like the "bigger" region won.
-
-      // fix the degenerate region.
-      doFsck(conf, true);
-
-      // check that the degenerate region is gone and no data loss
-      HBaseFsck hbck2 = doFsck(conf,false);
-      assertNoErrors(hbck2);
-      assertEquals(0, hbck2.getOverlapGroups(table).size());
-      assertEquals(ROWKEYS.length, countRows());
-    } finally {
-      cleanupTable(table);
-    }
-  }
-
-  /**
-   * This creates and fixes a bad table with regions that has startkey == endkey
-   */
-  @Test (timeout=180000)
-  public void testDegenerateRegions() throws Exception {
-    TableName table = TableName.valueOf("tableDegenerateRegions");
-    try {
-      setupTable(table);
-      assertNoErrors(doFsck(conf, false));
-      assertEquals(ROWKEYS.length, countRows());
-
-      // Now let's mess it up, by adding a region with a duplicate startkey
-      HRegionInfo hriDupe =
-          createRegion(tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("B"));
-      TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
-      TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
-          .waitForAssignment(hriDupe);
-      ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
-      TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
-
-      HBaseFsck hbck = doFsck(conf,false);
-      assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DEGENERATE_REGION, ERROR_CODE.DUPE_STARTKEYS,
-          ERROR_CODE.DUPE_STARTKEYS });
-      assertEquals(2, hbck.getOverlapGroups(table).size());
-      assertEquals(ROWKEYS.length, countRows());
-
-      // fix the degenerate region.
-      doFsck(conf, true);
-
-      // check that the degenerate region is gone and no data loss
-      HBaseFsck hbck2 = doFsck(conf,false);
-      assertNoErrors(hbck2);
-      assertEquals(0, hbck2.getOverlapGroups(table).size());
-      assertEquals(ROWKEYS.length, countRows());
-    } finally {
-      cleanupTable(table);
-    }
-  }
-
-  /**
-   * This creates and fixes a bad table where a region is completely contained
-   * by another region.
-   */
-  @Test (timeout=180000)
-  public void testContainedRegionOverlap() throws Exception {
-    TableName table =
-        TableName.valueOf("tableContainedRegionOverlap");
-    try {
-      setupTable(table);
-      assertEquals(ROWKEYS.length, countRows());
-
-      // Mess it up by creating an overlap in the metadata
-      HRegionInfo hriOverlap =
-          createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B"));
-      TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
-      TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
-          .waitForAssignment(hriOverlap);
-      ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
-      TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
-
-      HBaseFsck hbck = doFsck(conf, false);
-      assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
-      assertEquals(2, hbck.getOverlapGroups(table).size());
-      assertEquals(ROWKEYS.length, countRows());
-
-      // fix the problem.
-      doFsck(conf, true);
-
-      // verify that overlaps are fixed
-      HBaseFsck hbck2 = doFsck(conf,false);
-      assertNoErrors(hbck2);
-      assertEquals(0, hbck2.getOverlapGroups(table).size());
-      assertEquals(ROWKEYS.length, countRows());
-    } finally {
-      cleanupTable(table);
-    }
-  }
-
-  /**
-   * This creates and fixes a bad table where an overlap group of
-   * 3 regions. Set HBaseFsck.maxMerge to 2 to trigger sideline overlapped
-   * region. Mess around the meta data so that closeRegion/offlineRegion
-   * throws exceptions.
-   */
-  @Test (timeout=180000)
-  public void testSidelineOverlapRegion() throws Exception {
-    TableName table =
-        TableName.valueOf("testSidelineOverlapRegion");
-    try {
-      setupTable(table);
-      assertEquals(ROWKEYS.length, countRows());
-
-      // Mess it up by creating an overlap
-      MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
-      HMaster master = cluster.getMaster();
-      HRegionInfo hriOverlap1 =
-          createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("AB"));
-      master.assignRegion(hriOverlap1);
-      master.getAssignmentManager().waitForAssignment(hriOverlap1);
-      HRegionInfo hriOverlap2 =
-          createRegion(tbl.getTableDescriptor(), Bytes.toBytes("AB"), Bytes.toBytes("B"));
-      master.assignRegion(hriOverlap2);
-      master.getAssignmentManager().waitForAssignment(hriOverlap2);
-
-      HBaseFsck hbck = doFsck(conf, false);
-      assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.DUPE_STARTKEYS,
-        ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.OVERLAP_IN_REGION_CHAIN});
-      assertEquals(3, hbck.getOverlapGroups(table).size());
-      assertEquals(ROWKEYS.length, countRows());
-
-      // mess around the overlapped regions, to trigger NotServingRegionException
-      Multimap<byte[], HbckInfo> overlapGroups = hbck.getOverlapGroups(table);
-      ServerName serverName = null;
-      byte[] regionName = null;
-      for (HbckInfo hbi: overlapGroups.values()) {
-        if ("A".equals(Bytes.toString(hbi.getStartKey()))
-            && "B".equals(Bytes.toString(hbi.getEndKey()))) {
-          regionName = hbi.getRegionName();
-
-          // get an RS not serving the region to force bad assignment info in to META.
-          int k = cluster.getServerWith(regionName);
-          for (int i = 0; i < 3; i++) {
-            if (i != k) {
-              HRegionServer rs = cluster.getRegionServer(i);
-              serverName = rs.getServerName();
-              break;
-            }
-          }
-
-          HBaseFsckRepair.closeRegionSilentlyAndWait((HConnection) connection,
-              cluster.getRegionServer(k).getServerName(), hbi.getHdfsHRI());
-          admin.offline(regionName);
-          break;
-        }
-      }
-
-      assertNotNull(regionName);
-      assertNotNull(serverName);
-      try (Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService)) {
-        Put put = new Put(regionName);
-        put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
-            Bytes.toBytes(serverName.getHostAndPort()));
-        meta.put(put);
-      }
-
-      // fix the problem.
-      HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
-      fsck.connect();
-      HBaseFsck.setDisplayFullReport(); // i.e. -details
-      fsck.setTimeLag(0);
-      fsck.setFixAssignments(true);
-      fsck.setFixMeta(true);
-      fsck.setFixHdfsHoles(true);
-      fsck.setFixHdfsOverlaps(true);
-      fsck.setFixHdfsOrphans(true);
-      fsck.setFixVersionFile(true);
-      fsck.setSidelineBigOverlaps(true);
-      fsck.setMaxMerge(2);
-      fsck.onlineHbck();
-      fsck.close();
-
-      // verify that overlaps are fixed, and there are less rows
-      // since one region is sidelined.
-      HBaseFsck hbck2 = doFsck(conf,false);
-      assertNoErrors(hbck2);
-      assertEquals(0, hbck2.getOverlapGroups(table).size());
-      assertTrue(ROWKEYS.length > countRows());
-    } finally {
-      cleanupTable(table);
-    }
-  }
-
-  /**
-   * This creates and fixes a bad table where a region is completely contained
-   * by another region, and there is a hole (sort of like a bad split)
-   */
-  @Test (timeout=180000)
-  public void testOverlapAndOrphan() throws Exception {
-    TableName table =
-        TableName.valueOf("tableOverlapAndOrphan");
-    try {
-      setupTable(table);
-      assertEquals(ROWKEYS.length, countRows());
-
-      // Mess it up by creating an overlap in the metadata
-      admin.disableTable(table);
-      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"), true,
-          true, false, true, HRegionInfo.DEFAULT_REPLICA_ID);
-      admin.enableTable(table);
-
-      HRegionInfo hriOverlap =
-          createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B"));
-      TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
-      TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
-          .waitForAssignment(hriOverlap);
-      ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
-      TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
-
-      HBaseFsck hbck = doFsck(conf, false);
-      assertErrors(hbck,
-          new ERROR_CODE[] { ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
-              ERROR_CODE.HOLE_IN_REGION_CHAIN });
-
-      // fix the problem.
-      doFsck(conf, true);
-
-      // verify that overlaps are fixed
-      HBaseFsck hbck2 = doFsck(conf,false);
-      assertNoErrors(hbck2);
-      assertEquals(0, hbck2.getOverlapGroups(table).size());
-      assertEquals(ROWKEYS.length, countRows());
-    } finally {
-      cleanupTable(table);
-    }
-  }
-
-  /**
-   * This creates and fixes a bad table where a region overlaps two regions --
-   * a start key contained in another region and its end key is contained in
-   * yet another region.
-   */
-  @Test (timeout=180000)
-  public void testCoveredStartKey() throws Exception {
-    TableName table =
-        TableName.valueOf("tableCoveredStartKey");
-    try {
-      setupTable(table);
-      assertEquals(ROWKEYS.length, countRows());
-
-      // Mess it up by creating an overlap in the metadata
-      HRegionInfo hriOverlap =
-          createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B2"));
-      TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
-      TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
-          .waitForAssignment(hriOverlap);
-      ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
-      TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
-
-      HBaseFsck hbck = doFsck(conf, false);
-      assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
-          ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
-      assertEquals(3, hbck.getOverlapGroups(table).size());
-      assertEquals(ROWKEYS.length, countRows());
-
-      // fix the problem.
-      doFsck(conf, true);
-
-      // verify that overlaps are fixed
-      HBaseFsck hbck2 = doFsck(conf, false);
-      assertErrors(hbck2, new ERROR_CODE[0]);
-      assertEquals(0, hbck2.getOverlapGroups(table).size());
-      assertEquals(ROWKEYS.length, countRows());
-    } finally {
-      cleanupTable(table);
-    }
-  }
-
-  /**
-   * This creates and fixes a bad table with a missing region -- hole in meta
-   * and data missing in the fs.
-   */
-  @Test (timeout=180000)
-  public void testRegionHole() throws Exception {
-    TableName table =
-        TableName.valueOf("tableRegionHole");
-    try {
-      setupTable(table);
-      assertEquals(ROWKEYS.length, countRows());
-
-      // Mess it up by leaving a hole in the assignment, meta, and hdfs data
-      admin.disableTable(table);
-      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true,
-          true, true);
-      admin.enableTable(table);
-
-      HBaseFsck hbck = doFsck(conf, false);
-      assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.HOLE_IN_REGION_CHAIN });
-      // holes are separate from overlap groups
-      assertEquals(0, hbck.getOverlapGroups(table).size());
-
-      // fix hole
-      doFsck(conf, true);
-
-      // check that hole fixed
-      assertNoErrors(doFsck(conf,false));
-      assertEquals(ROWKEYS.length - 2, countRows()); // lost a region so lost a row
-    } finally {
-      cleanupTable(table);
-    }
-  }
-
-  /**
-   * This creates and fixes a bad table with a missing region -- hole in meta
-   * and data present but .regioinfino missing (an orphan hdfs region)in the fs.
-   */
-  @Test (timeout=180000)
-  public void testHDFSRegioninfoMissing() throws Exception {
-    TableName table = TableName.valueOf("tableHDFSRegioninfoMissing");
-    try {
-      setupTable(table);
-      assertEquals(ROWKEYS.length, countRows());
-
-      // Mess it up by leaving a hole in the meta data
-      admin.disableTable(table);
-      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true,
-          true, false, true, HRegionInfo.DEFAULT_REPLICA_ID);
-      admin.enableTable(table);
-
-      HBaseFsck hbck = doFsck(conf, false);
-      assertErrors(hbck,
-          new ERROR_CODE[] { ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
-              ERROR_CODE.HOLE_IN_REGION_CHAIN });
-      // holes are separate from overlap groups
-      assertEquals(0, hbck.getOverlapGroups(table).size());
-
-      // fix hole
-      doFsck(conf, true);
-
-      // check that hole fixed
-      assertNoErrors(doFsck(conf, false));
-      assertEquals(ROWKEYS.length, countRows());
-    } finally {
-      cleanupTable(table);
-    }
-  }
-
-  /**
-   * This creates and fixes a bad table with a region that is missing meta and
-   * not assigned to a region server.
-   */
-  @Test (timeout=180000)
-  public void testNotInMetaOrDeployedHole() throws Exception {
-    TableName table =
-        TableName.valueOf("tableNotInMetaOrDeployedHole");
-    try {
-      setupTable(table);
-      assertEquals(ROWKEYS.length, countRows());
-
-      // Mess it up by leaving a hole in the meta data
-      admin.disableTable(table);
-      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true,
-          true, false); // don't rm from fs
-      admin.enableTable(table);
-
-      HBaseFsck hbck = doFsck(conf, false);
-      assertErrors(hbck,
-          new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN });
-      // holes are separate from overlap groups
-      assertEquals(0, hbck.getOverlapGroups(table).size());
-
-      // fix hole
-      assertErrors(doFsck(conf, true),
-          new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN });
-
-      // check that hole fixed
-      assertNoErrors(doFsck(conf, false));
-      assertEquals(ROWKEYS.length, countRows());
-    } finally {
-      cleanupTable(table);
-    }
-  }
-
-  @Test (timeout=180000)
-  public void testCleanUpDaughtersNotInMetaAfterFailedSplit() throws Exception {
-    TableName table = TableName.valueOf("testCleanUpDaughtersNotInMetaAfterFailedSplit");
-    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
-    try {
-      HTableDescriptor desc = new HTableDescriptor(table);
-      desc.addFamily(new HColumnDescriptor(Bytes.toBytes("f")));
-      createTable(TEST_UTIL, desc, null);
-
-      tbl = connection.getTable(desc.getTableName());
-      for (int i = 0; i < 5; i++) {
-        Put p1 = new Put(("r" + i).getBytes());
-        p1.add(Bytes.toBytes("f"), "q1".getBytes(), "v".getBytes());
-        tbl.put(p1);
-      }
-      admin.flush(desc.getTableName());
-      List<HRegion> regions = cluster.getRegions(desc.getTableName());
-      int serverWith = cluster.getServerWith(regions.get(0).getRegionInfo().getRegionName());
-      HRegionServer regionServer = cluster.getRegionServer(serverWith);
-      cluster.getServerWith(regions.get(0).getRegionInfo().getRegionName());
-      SplitTransactionImpl st = (SplitTransactionImpl)
-        new SplitTransactionFactory(TEST_UTIL.getConfiguration())
-          .create(regions.get(0), Bytes.toBytes("r3"));
-      st.prepare();
-      st.stepsBeforePONR(regionServer, regionServer, false);
-      AssignmentManager am = cluster.getMaster().getAssignmentManager();
-      Map<String, RegionState> regionsInTransition = am.getRegionStates().getRegionsInTransition();
-      for (RegionState state : regionsInTransition.values()) {
-        am.regionOffline(state.getRegion());
-      }
-      Map<HRegionInfo, ServerName> regionsMap = new HashMap<HRegionInfo, ServerName>();
-      regionsMap.put(regions.get(0).getRegionInfo(), regionServer.getServerName());
-      am.assign(regionsMap);
-      am.waitForAssignment(regions.get(0).getRegionInfo());
-      HBaseFsck hbck = doFsck(conf, false);
-      assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
-          ERROR_CODE.NOT_IN_META_OR_DEPLOYED });
-      // holes are separate from overlap groups
-      assertEquals(0, hbck.getOverlapGroups(table).size());
-
-      // fix hole
-      assertErrors(
-          doFsck(conf, false, true, false, false, false, false, false, false, false, false, null),
-          new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
-              ERROR_CODE.NOT_IN_META_OR_DEPLOYED });
-
-      // check that hole fixed
-      assertNoErrors(doFsck(conf, false));
-      assertEquals(5, countRows());
-    } finally {
-      if (tbl != null) {
-        tbl.close();
-        tbl = null;
-      }
-      cleanupTable(table);
-    }
-  }
-
-  /**
-   * This creates fixes a bad table with a hole in meta.
-   */
-  @Test (timeout=180000)
-  public void testNotInMetaHole() throws Exception {
-    TableName table =
-        TableName.valueOf("tableNotInMetaHole");
-    try {
-      setupTable(table);
-      assertEquals(ROWKEYS.length, countRows());
-
-      // Mess it up by leaving a hole in the meta data
-      admin.disableTable(table);
-      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), false,
-          true, false); // don't rm from fs
-      admin.enableTable(table);
-
-      HBaseFsck hbck = doFsck(conf, false);
-      assertErrors(hbck,
-          new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN });
-      // holes are separate from overlap groups
-      assertEquals(0, hbck.getOverlapGroups(table).size());
-
-      // fix hole
-      assertErrors(doFsck(conf, true),
-          new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN });
-
-      // check that hole fixed
-      assertNoErrors(doFsck(conf, false));
-      assertEquals(ROWKEYS.length, countRows());
-    } finally {
-      cleanupTable(table);
-    }
-  }
-
-  /**
-   * This creates and fixes a bad table with a region that is in meta but has
-   * no deployment or data hdfs
-   */
-  @Test (timeout=180000)
-  public void testNotInHdfs() throws Exception {
-    TableName table =
-        TableName.valueOf("tableNotInHdfs");
-    try {
-      setupTable(table);
-      assertEquals(ROWKEYS.length, countRows());
-
-      // make sure data in regions, if in wal only there is no data loss
-      admin.flush(table);
-
-      // Mess it up by leaving a hole in the hdfs data
-      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), false,
-          false, true); // don't rm meta
-
-      HBaseFsck hbck = doFsck(conf, false);
-      assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
-      // holes are separate from overlap groups
-      assertEquals(0, hbck.getOverlapGroups(table).size());
-
-      // fix hole
-      doFsck(conf, true);
-
-      // check that hole fixed
-      assertNoErrors(doFsck(conf,false));
-      assertEquals(ROWKEYS.length - 2, countRows());
-    } finally {
-      cleanupTable(table);
-    }
-  }
-
-  /**
-   * This creates and fixes a bad table with a region that is in meta but has
-   * no deployment or data hdfs. The table has region_replication set to 2.
-   */
-  @Test (timeout=180000)
-  public void testNotInHdfsWithReplicas() throws Exception {
-    TableName table =
-        TableName.valueOf("tableNotInHdfs");
-    try {
-      HRegionInfo[] oldHris = new HRegionInfo[2];
-      setupTableWithRegionReplica(table, 2);
-      assertEquals(ROWKEYS.length, countRows());
-      NavigableMap<HRegionInfo, ServerName> map =
-          MetaTableAccessor.allTableRegions(TEST_UTIL.getConnection(),
-              tbl.getName());
-      int i = 0;
-      // store the HRIs of the regions we will mess up
-      for (Map.Entry<HRegionInfo, ServerName> m : map.entrySet()) {
-        if (m.getKey().getStartKey().length > 0 &&
-            m.getKey().getStartKey()[0] == Bytes.toBytes("B")[0]) {
-          LOG.debug("Initially server hosting " + m.getKey() + " is " + m.getValue());
-          oldHris[i++] = m.getKey();
-        }
-      }
-      // make sure data in regions
-      admin.flush(table);
-
-      // Mess it up by leaving a hole in the hdfs data
-      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), false,
-          false, true); // don't rm meta
-
-      HBaseFsck hbck = doFsck(conf, false);
-      assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS });
-
-      // fix hole
-      doFsck(conf, true);
-
-      // check that hole fixed
-      assertNoErrors(doFsck(conf, false));
-      assertEquals(ROWKEYS.length - 2, countRows());
-
-      // the following code checks whether the old primary/secondary has
-      // been unassigned and the new primary/secondary has been assigned
-      i = 0;
-      HRegionInfo[] newHris = new HRegionInfo[2];
-      // get all table's regions from meta
-      map = MetaTableAccessor.allTableRegions(TEST_UTIL.getConnection(), tbl.getName());
-      // get the HRIs of the new regions (hbck created new regions for fixing the hdfs mess-up)
-      for (Map.Entry<HRegionInfo, ServerName> m : map.entrySet()) {
-        if (m.getKey().getStartKey().length > 0 &&
-            m.getKey().getStartKey()[0] == Bytes.toBytes("B")[0]) {
-          newHris[i++] = m.getKey();
-        }
-      }
-      // get all the online regions in the regionservers
-      Collection<ServerName> servers = admin.getClusterStatus().getServers();
-      Set<HRegionInfo> onlineRegions = new HashSet<HRegionInfo>();
-      for (ServerName s : servers) {
-        List<HRegionInfo> list = admin.getOnlineRegions(s);
-        onlineRegions.addAll(list);
-      }
-      // the new HRIs must be a subset of the online regions
-      assertTrue(onlineRegions.containsAll(Arrays.asList(newHris)));
-      // the old HRIs must not be part of the set (removeAll would return false if
-      // the set didn't change)
-      assertFalse(onlineRegions.removeAll(Arrays.asList(oldHris)));
-    } finally {
-      cleanupTable(table);
-      admin.close();
-    }
-  }
-
-
-  /**
-   * This creates entries in hbase:meta with no hdfs data.  This should cleanly
-   * remove the table.
-   */
-  @Test (timeout=180000)
-  public void testNoHdfsTable() throws Exception {
-    TableName table = TableName.valueOf("NoHdfsTable");
-    setupTable(table);
-    assertEquals(ROWKEYS.length, countRows());
-
-    // make sure data in regions, if in wal only there is no data loss
-    admin.flush(table);
-
-    // Mess it up by deleting hdfs dirs
-    deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""),
-        Bytes.toBytes("A"), false, false, true); // don't rm meta
-    deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
-        Bytes.toBytes("B"), false, false, true); // don't rm meta
-    deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
-        Bytes.toBytes("C"), false, false, true); // don't rm meta
-    deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"),
-        Bytes.toBytes(""), false, false, true); // don't rm meta
-
-    // also remove the table directory in hdfs
-    deleteTableDir(table);
-
-    HBaseFsck hbck = doFsck(conf, false);
-    assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS,
-        ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS,
-        ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.ORPHAN_TABLE_STATE, });
-    // holes are separate from overlap groups
-    assertEquals(0, hbck.getOverlapGroups(table).size());
-
-    // fix hole
-    doFsck(conf, true); // detect dangling regions and remove those
-
-    // check that hole fixed
-    assertNoErrors(doFsck(conf,false));
-    assertFalse("Table " + table + " should have been deleted", admin.tableExists(table));
-  }
-
-  public void deleteTableDir(TableName table) throws IOException {
-    Path rootDir = FSUtils.getRootDir(conf);
-    FileSystem fs = rootDir.getFileSystem(conf);
-    Path p = FSUtils.getTableDir(rootDir, table);
-    HBaseFsck.debugLsr(conf, p);
-    boolean success = fs.delete(p, true);
-    LOG.info("Deleted " + p + " sucessfully? " + success);
-  }
-
-  /**
-   * when the hbase.version file missing, It is fix the fault.
-   */
-  @Test (timeout=180000)
-  public void testNoVersionFile() throws Exception {
-    // delete the hbase.version file
-    Path rootDir = FSUtils.getRootDir(conf);
-    FileSystem fs = rootDir.getFileSystem(conf);
-    Path versionFile = new Path(rootDir, HConstants.VERSION_FILE_NAME);
-    fs.delete(versionFile, true);
-
-    // test
-    HBaseFsck hbck = doFsck(conf, false);
-    assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_VERSION_FILE });
-    // fix hbase.version missing
-    doFsck(conf, true);
-
-    // no version file fixed
-    assertNoErrors(doFsck(conf, false));
-  }
-
-  /**
-   * when the hbase.version file missing, It is fix the fault.
-   */
-  @Test (timeout=180000)
-  public void testNoTableState() throws Exception {
-    // delete the hbase.version file
-    TableName table =
-        TableName.valueOf("testNoTableState");
-    try {
-      setupTable(table);
-      // make sure data in regions, if in wal only there is no data loss
-      admin.flush(table);
-
-      MetaTableAccessor.deleteTableState(TEST_UTIL.getConnection(), table);
-
-      // test
-      HBaseFsck hbck = doFsck(conf, false);
-      assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_TABLE_STATE });
-      // fix table state missing
-      doFsck(conf, true);
-
-      assertNoErrors(doFsck(conf, false));
-      assertTrue(TEST_UTIL.getHBaseAdmin().isTableEnabled(table));
-    } finally {
-      cleanupTable(table);
-    }
-  }
-
-  /**
-   * The region is not deployed when the table is disabled.
-   */
-  @Test (timeout=180000)
-  public void testRegionShouldNotBeDeployed() throws Exception {
-    TableName table =
-        TableName.valueOf("tableRegionShouldNotBeDeployed");
-    try {
-      LOG.info("Starting testRegionShouldNotBeDeployed.");
-      MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
-      assertTrue(cluster.waitForActiveAndReadyMaster());
-
-
-      byte[][] SPLIT_KEYS = new byte[][] { new byte[0], Bytes.toBytes("aaa"),
-          Bytes.toBytes("bbb"), Bytes.toBytes("ccc"), Bytes.toBytes("ddd") };
-      HTableDescriptor htdDisabled = new HTableDescriptor(table);
-      htdDisabled.addFamily(new HColumnDescriptor(FAM));
-
-      // Write the .tableinfo
-      FSTableDescriptors fstd = new FSTableDescriptors(conf);
-      fstd.createTableDescriptor(htdDisabled);
-      List<HRegionInfo> disabledRegions =
-          TEST_UTIL.createMultiRegionsInMeta(conf, htdDisabled, SPLIT_KEYS);
-
-      // Let's just assign everything to first RS
-      HRegionServer hrs = cluster.getRegionServer(0);
-
-      // Create region files.
-      admin.disableTable(table);
-      admin.enableTable(table);
-
-      // Disable the table and close its regions
-      admin.disableTable(table);
-      HRegionInfo region = disabledRegions.remove(0);
-      byte[] regionName = region.getRegionName();
-
-      // The region should not be assigned currently
-      assertTrue(cluster.getServerWith(regionName) == -1);
-
-      // Directly open a region on a region server.
-      // If going through AM/ZK, the region won't be open.
-      // Even it is opened, AM will close it which causes
-      // flakiness of this test.
-      HRegion r = HRegion.openHRegion(
-        region, htdDisabled, hrs.getWAL(region), conf);
-      hrs.addToOnlineRegions(r);
-
-      HBaseFsck hbck = doFsck(conf, false);
-      assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.SHOULD_NOT_BE_DEPLOYED });
-
-      // fix this fault
-      doFsck(conf, true);
-
-      // check result
-      assertNoErrors(doFsck(conf, false));
-    } finally {
-      admin.enableTable(table);
-      cleanupTable(table);
-    }
-  }
-
-  /**
-   * This creates two tables and mess both of them and fix them one by one
-   */
-  @Test (timeout=180000)
-  public void testFixByTable() throws Exception {
-    TableName table1 =
-        TableName.valueOf("testFixByTable1");
-    TableName table2 =
-        TableName.valueOf("testFixByTable2");
-    try {
-      setupTable(table1);
-      // make sure data in regions, if in wal only there is no data loss
-      admin.flush(table1);
-      // Mess them up by leaving a hole in the hdfs data
-      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
-        Bytes.toBytes("C"), false, false, true); // don't rm meta
-
-      setupTable(table2);
-      // make sure data in regions, if in wal only there is no data loss
-      admin.flush(table2);
-      // Mess them up by leaving a hole in the hdfs data
-      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), false,
-          false, true); // don't rm meta
-
-      HBaseFsck hbck = doFsck(conf, false);
-      assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS });
-
-      // fix hole in table 1
-      doFsck(conf, true, table1);
-      // check that hole in table 1 fixed
-      assertNoErrors(doFsck(conf, false, table1));
-      // check that hole in table 2 still there
-      assertErrors(doFsck(conf, false, table2), new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS });
-
-      // fix hole in table 2
-      doFsck(conf, true, table2);
-      // check that hole in both tables fixed
-      assertNoErrors(doFsck(conf, false));
-      assertEquals(ROWKEYS.length - 2, countRows());
-    } finally {
-      cleanupTable(table1);
-      cleanupTable(table2);
-    }
-  }
-  /**
-   * A split parent in meta, in hdfs, and not deployed
-   */
-  @Test (timeout=180000)
-  public void testLingeringSplitParent() throws Exception {
-    TableName table =
-        TableName.valueOf("testLingeringSplitParent");
-    Table meta = null;
-    try {
-      setupTable(table);
-      assertEquals(ROWKEYS.length, countRows());
-
-      // make sure data in regions, if in wal only there is no data loss
-      admin.flush(table);
-
-      HRegionLocation location;
-      try(RegionLocator rl = connection.getRegionLocator(tbl.getName())) {
-        location = rl.getRegionLocation(Bytes.toBytes("B"));
-      }
-
-      // Delete one region from meta, but not hdfs, unassign it.
-      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
-        Bytes.toBytes("C"), true, true, false);
-
-      // Create a new meta entry to fake it as a split parent.
-      meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
-      HRegionInfo hri = location.getRegionInfo();
-
-      HRegionInfo a = new HRegionInfo(tbl.getName(),
-        Bytes.toBytes("B"), Bytes.toBytes("BM"));
-      HRegionInfo b = new HRegionInfo(tbl.getName(),
-        Bytes.toBytes("BM"), Bytes.toBytes("C"));
-
-      hri.setOffline(true);
-      hri.setSplit(true);
-
-      MetaTableAccessor.addRegionToMeta(meta, hri, a, b);
-      meta.close();
-      admin.flush(TableName.META_TABLE_NAME);
-
-      HBaseFsck hbck = doFsck(conf, false);
-      assertErrors(hbck, new ERROR_CODE[] {
-        ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
-
-      // regular repair cannot fix lingering split parent
-      hbck = doFsck(conf, true);
-      assertErrors(hbck, new ERROR_CODE[] {
-        ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN });
-      assertFalse(hbck.shouldRerun());
-      hbck = doFsck(conf, false);
-      assertErrors(hbck, new ERROR_CODE[] {
-        ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
-
-      // fix lingering split parent
-      hbck = new HBaseFsck(conf, hbfsckExecutorService);
-      hbck.connect();
-      HBaseFsck.setDisplayFullReport(); // i.e. -details
-      hbck.setTimeLag(0);
-      hbck.setFixSplitParents(true);
-      hbck.onlineHbck();
-      assertTrue(hbck.shouldRerun());
-      hbck.close();
-
-      Get get = new Get(hri.getRegionName());
-      Result result = meta.get(get);
-      assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
-        HConstants.SPLITA_QUALIFIER).isEmpty());
-      assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
-        HConstants.SPLITB_QUALIFIER).isEmpty());
-      admin.flush(TableName.META_TABLE_NAME);
-
-      // fix other issues
-      doFsck(conf, true);
-
-      // check that all are fixed
-      assertNoErrors(doFsck(conf, false));
-      assertEquals(ROWKEYS.length, countRows());
-    } finally {
-      cleanupTable(table);
-      IOUtils.closeQuietly(meta);
-    }
-  }
-
-  /**
-   * Tests that LINGERING_SPLIT_PARENT is not erroneously reported for
-   * valid cases where the daughters are there.
-   */
-  @Test (timeout=180000)
-  public void testValidLingeringSplitParent() throws Exception {
-    TableName table =
-        TableName.valueOf("testLingeringSplitParent");
-    Table meta = null;
-    try {
-      setupTable(table);
-      assertEquals(ROWKEYS.length, countRows());
-
-      // make sure data in regions, if in wal only there is no data loss
-      admin.flush(table);
-
-      try(RegionLocator rl = connection.getRegionLocator(tbl.getName())) {
-        HRegionLocation location = rl.getRegionLocation(Bytes.toBytes("B"));
-
-        meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
-        HRegionInfo hri = location.getRegionInfo();
-
-        // do a regular split
-        byte[] regionName = location.getRegionInfo().getRegionName();
-        admin.splitRegion(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
-        TestEndToEndSplitTransaction.blockUntilRegionSplit(conf, 60000, regionName, true);
-
-        // TODO: fixHdfsHoles does not work against splits, since the parent dir lingers on
-        // for some time until children references are deleted. HBCK erroneously sees this as
-        // overlapping regions
-        HBaseFsck hbck = doFsck(conf, true, true, false, false, false, true, true, true, false,
-            false, null);
-        assertErrors(hbck, new ERROR_CODE[] {}); //no LINGERING_SPLIT_PARENT reported
-
-        // assert that the split hbase:meta entry is still there.
-        Get get = new Get(hri.getRegionName());
-        Result result = meta.get(get);
-        assertNotNull(result);
-        assertNotNull(MetaTableAccessor.getHRegionInfo(result));
-
-        assertEquals(ROWKEYS.length, countRows());
-
-        // assert that we still have the split regions
-        assertEquals(rl.getStartKeys().length, SPLITS.length + 1 + 1); //SPLITS + 1 is # regions
-        // pre-split.
-        assertNoErrors(doFsck(conf, false));
-      }
-    } finally {
-      cleanupTable(table);
-      IOUtils.closeQuietly(meta);
-    }
-  }
-
-  /**
-   * Split crashed after write to hbase:meta finished for the parent region, but
-   * failed to write daughters (pre HBASE-7721 codebase)
-   */
-  @Test(timeout=75000)
-  public void testSplitDaughtersNotInMeta() throws Exception {
-    TableName table = TableName.valueOf("testSplitdaughtersNotInMeta");
-    Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
-    try {
-      setupTable(table);
-      assertEquals(ROWKEYS.length, countRows());
-
-      // make sure data in regions, if in wal only there is no data loss
-      admin.flush(table);
-
-      try(RegionLocator rl = connection.getRegionLocator(tbl.getName())) {
-        HRegionLocation location = rl.getRegionLocation(Bytes.toBytes("B"));
-
-        HRegionInfo hri = location.getRegionInfo();
-
-        // do a regular split
-        byte[] regionName = location.getRegionInfo().getRegionName();
-        admin.splitRegion(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
-        TestEndToEndSplitTransaction.blockUntilRegionSplit(conf, 60000, regionName, true);
-
-        PairOfSameType<HRegionInfo> daughters = MetaTableAccessor.getDaughterRegions(
-            meta.get(new Get(regionName)));
-
-        // Delete daughter regions from meta, but not hdfs, unassign it.
-
-        ServerName firstSN =
-            rl.getRegionLocation(daughters.getFirst().getStartKey()).getServerName();
-        ServerName secondSN =
-            rl.getRegionLocation(daughters.getSecond().getStartKey()).getServerName();
-
-        undeployRegion(connection, firstSN, daughters.getFirst());
-        undeployRegion(connection, secondSN, daughters.getSecond());
-
-        List<Delete> deletes = new ArrayList<>();
-        deletes.add(new Delete(daughters.getFirst().getRegionName()));
-        deletes.add(new Delete(daughters.getSecond().getRegionName()));
-        meta.delete(deletes);
-
-        // Remove daughters from regionStates
-        RegionStates regionStates = TEST_UTIL.getMiniHBaseCluster().getMaster().
-            getAssignmentManager().getRegionStates();
-        regionStates.deleteRegion(daughters.getFirst());
-        regionStates.deleteRegion(daughters.getSecond());
-
-        HBaseFsck hbck = doFsck(conf, false);
-        assertErrors(hbck, new ERROR_CODE[] {
-            ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
-            ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
-            ERROR_CODE.HOLE_IN_REGION_CHAIN }); //no LINGERING_SPLIT_PARENT
-
-        // now fix it. The fix should not revert the region split, but add daughters to META
-        hbck = doFsck(conf, true, true, false, false, false, false, false, false, false,
-            false, null);
-        assertErrors(hbck, new ERROR_CODE[] {
-            ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
-            ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
-            ERROR_CODE.HOLE_IN_REGION_CHAIN });
-
-        // assert that the split hbase:meta entry is still there.
-        Get get = new Get(hri.getRegionName());
-        Result result = meta.get(get);
-        assertNotNull(result);
-        assertNotNull(MetaTableAccessor.getHRegionInfo(result));
-
-        assertEquals(ROWKEYS.length, countRows());
-
-        // assert that we still have the split regions
-        assertEquals(rl.getStartKeys().length, SPLITS.length + 1 + 1); //SPLITS + 1 is # regions
-        // pre-split.
-        assertNoErrors(doFsck(conf, false)); //should be fixed by now
-      }
-    } finally {
-      meta.close();
-      cleanupTable(table);
-    }
-  }
-
-  /**
-   * This creates and fixes a bad table with a missing region which is the 1st region -- hole in
-   * meta and data missing in the fs.
-   */
-  @Test(timeout=120000)
-  public void testMissingFirstRegion() throws Exception {
-    TableName table = TableName.valueOf("testMissingFirstRegion");
-    try {
-      setupTable(table);
-      assertEquals(ROWKEYS.length, countRows());
-
-      // Mess it up by leaving a hole in the assignment, meta, and hdfs data
-      admin.disableTable(table);
-      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""), Bytes.toBytes("A"), true,
-          true, true);
-      admin.enableTable(table);
-
-      HBaseFsck hbck = doFsck(conf, false);
-      assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY });
-      // fix hole
-      doFsck(conf, true);
-      // check that hole fixed
-      assertNoErrors(doFsck(conf, false));
-    } finally {
-      cleanupTable(table);
-    }
-  }
-
-  /**
-   * This creates and fixes a bad table with a missing region which is the 1st region -- hole in
-   * meta and data missing in the fs.
-   */
-  @Test(timeout=120000)
-  public void testRegionDeployedNotInHdfs() throws Exception {
-    TableName table =
-        TableName.valueOf("testSingleRegionDeployedNotInHdfs");
-    try {
-      setupTable(table);
-      admin.flush(table);
-
-      // Mess it up by deleting region dir
-      deleteRegion(conf, tbl.getTableDescriptor(),
-        HConstants.EMPTY_START_ROW, Bytes.toBytes("A"), false,
-        false, true);
-
-      HBaseFsck hbck = doFsck(conf, false);
-      assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS });
-      // fix hole
-      doFsck(conf, true);
-      // check that hole fixed
-      assertNoErrors(doFsck(conf, false));
-    } finally {
-      cleanupTable(table);
-    }
-  }
-
-  /**
-   * This creates and fixes a bad table with missing last region -- hole in meta and data missing in
-   * the fs.
-   */
-  @Test(timeout=120000)
-  public void testMissingLastRegion() throws Exception {
-    TableName table =
-        TableName.valueOf("testMissingLastRegion");
-    try {
-      setupTable(table);
-      assertEquals(ROWKEYS.length, countRows());
-
-      // Mess it up by leaving a hole in the assignment, meta, and hdfs data
-      admin.disableTable(table);
-      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"), Bytes.toBytes(""), true,
-          true, true);
-      admin.enableTable(table);
-
-      HBaseFsck hbck = doFsck(conf, false);
-      assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY });
-      // fix hole
-      doFsck(conf, true);
-      // check that hole fixed
-      assertNoErrors(doFsck(conf, false));
-    } finally {
-      cleanupTable(table);
-    }
-  }
-
-  /**
-   * Test -noHdfsChecking option can detect and fix assignments issue.
-   */
-  @Test (timeout=180000)
-  public void testFixAssignmentsAndNoHdfsChecking() throws Exception {
-    TableName table =
-        TableName.valueOf("testFixAssignmentsAndNoHdfsChecking");
-    try {
-      setupTable(table);
-      assertEquals(ROWKEYS.length, countRows());
-
-      // Mess it up by closing a region
-      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"), true,
-          false, false, false, HRegionInfo.DEFAULT_REPLICA_ID);
-
-      // verify there is no other errors
-      HBaseFsck hbck = doFsck(conf, false);
-      assertErrors(hbck,
-          new ERROR_CODE[] { ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN });
-
-      // verify that noHdfsChecking report the same errors
-      HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
-      fsck.connect();
-      HBaseFsck.setDisplayFullReport(); // i.e. -details
-      fsck.setTimeLag(0);
-      fsck.setCheckHdfs(false);
-      fsck.onlineHbck();
-      assertErrors(fsck,
-          new ERROR_CODE[] { ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN });
-      fsck.close();
-
-      // verify that fixAssignments works fine with noHdfsChecking
-      fsck = new HBaseFsck(conf, hbfsckExecutorService);
-      fsck.connect();
-      HBaseFsck.setDisplayFullReport(); // i.e. -details
-      fsck.setTimeLag(0);
-      fsck.setCheckHdfs(false);
-      fsck.setFixAssignments(true);
-      fsck.onlineHbck();
-      assertTrue(fsck.shouldRerun());
-      fsck.onlineHbck();
-      assertNoErrors(fsck);
-
-      assertEquals(ROWKEYS.length, countRows());
-
-      fsck.close();
-    } finally {
-      cleanupTable(table);
-    }
-  }
-
-  /**
-   * Test -noHdfsChecking option can detect region is not in meta but deployed.
-   * However, it can not fix it without checking Hdfs because we need to get
-   * the region info from Hdfs in this case, then to patch the meta.
-   */
-  @Test (timeout=180000)
-  public void testFixMetaNotWorkingWithNoHdfsChecking() throws Exception {
-    TableName table =
-        TableName.valueOf("testFixMetaNotWorkingWithNoHdfsChecking");
-    try {
-      setupTable(table);
-      assertEquals(ROWKEYS.length, countRows());
-
-      // Mess it up by deleting a region from the metadata
-      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
-        Bytes.toBytes("B"), false, true, false, false, HRegionInfo.DEFAULT_REPLICA_ID);
-
-      // verify there is no other errors
-      HBaseFsck hbck = doFsck(conf, false);
-      assertErrors(hbck,
-          new ERROR_CODE[] { ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN });
-
-      // verify that noHdfsChecking report the same errors
-      HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
-      fsck.connect();
-      HBaseFsck.setDisplayFullReport(); // i.e. -details
-      fsck.setTimeLag(0);
-      fsck.setCheckHdfs(false);
-      fsck.onlineHbck();
-      assertErrors(fsck,
-          new ERROR_CODE[] { ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN });
-      fsck.close();
-
-      // verify that fixMeta doesn't work with noHdfsChecking
-      fsck = new HBaseFsck(conf, hbfsckExecutorService);
-      fsck.connect();
-      HBaseFsck.setDisplayFullReport(); // i.e. -details
-      fsck.setTimeLag(0);
-      fsck.setCheckHdfs(false);
-      fsck.setFixAssignments(true);
-      fsck.setFixMeta(true);
-      fsck.onlineHbck();
-      assertFalse(fsck.shouldRerun());
-      assertErrors(fsck,
-          new ERROR_CODE[] { ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN });
-      fsck.close();
-
-      // fix the cluster so other tests won't be impacted
-      fsck = doFsck(conf, true);
-      assertTrue(fsck.shouldRerun());
-      fsck = doFsck(conf, true);
-      assertNoErrors(fsck);
-    } finally {
-      cleanupTable(table);
-    }
-  }
-
-  /**
-   * Test -fixHdfsHoles doesn't work with -noHdfsChecking option,
-   * and -noHdfsChecking can't detect orphan Hdfs region.
-   */
-  @Test (timeout=180000)
-  public void testFixHdfsHolesNotWorkingWithNoHdfsChecking() throws Exception {
-    TableName table =
-        TableName.valueOf("testFixHdfsHolesNotWorkingWithNoHdfsChecking");
-    try {
-      setupTable(table);
-      assertEquals(ROWKEYS.length, countRows());
-
-      // Mess it up by creating an overlap in the metadata
-      admin.disableTable(table);
-      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"), true,
-          true, false, true, HRegionInfo.DEFAULT_REPLICA_ID);
-      admin.enableTable(table);
-
-      HRegionInfo hriOverlap =
-          createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B"));
-      TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
-      TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
-        .waitForAssignment(hriOverlap);
-      ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
-      TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
-
-      HBaseFsck hbck = doFsck(conf, false);
-      assertErrors(hbck, new ERROR_CODE[] {
-        ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
-        ERROR_CODE.HOLE_IN_REGION_CHAIN});
-
-      // verify that noHdfsChecking can't detect ORPHAN_HDFS_REGION
-      HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
-      fsck.connect();
-      HBaseFsck.setDisplayFullReport(); // i.e. -details
-      fsck.setTimeLag(0);
-      fsck.setCheckHdfs(false);
-      fsck.onlineHbck();
-      assertErrors(fsck, new ERROR_CODE[] { ERROR_CODE.HOLE_IN_REGION_CHAIN });
-      fsck.close();
-
-      // verify that fixHdfsHoles doesn't work with noHdfsChecking
-      fsck = new HBaseFsck(conf, hbfsckExecutorService);
-      fsck.connect();
-      HBaseFsck.setDisplayFullReport(); // i.e. -details
-      fsck.setTimeLag(0);
-      fsck.setCheckHdfs(false);
-      fsck.setFixHdfsHoles(true);
-      fsck.setFixHdfsOverlaps(true);
-      fsck.setFixHdfsOrphans(true);
-      fsck.onlineHbck();
-      assertFalse(fsck.shouldRerun());
-      assertErrors(fsck, new ERROR_CODE[] { ERROR_CODE.HOLE_IN_REGION_CHAIN });
-      fsck.close();
-    } finally {
-      if (admin.isTableDisabled(table)) {
-        admin.enableTable(table);
-      }
-      cleanupTable(table);
-    }
-  }
-
-  /**
-   * We don't have an easy way to verify that a flush completed, so we loop until we find a
-   * legitimate hfile and return it.
-   * @param fs
-   * @param table
-   * @return Path of a flushed hfile.
-   * @throws IOException
-   */
-  Path getFlushedHFile(FileSystem fs, TableName table) throws IOException {
-    Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
-    Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
-    Path famDir = new Path(regionDir, FAM_STR);
-
-    // keep doing this until we get a legit hfile
-    while (true) {
-      FileStatus[] hfFss = fs.listStatus(famDir);
-      if (hfFss.length == 0) {
-        continue;
-      }
-      for (FileStatus hfs : hfFss) {
-        if (!hfs.isDirectory()) {
-          return hfs.getPath();
-        }
-      }
-    }
-  }
-
-  /**
-   * Gets flushed mob files.
-   * @param fs The current file system.
-   * @param table The current table name.
-   * @return Path of a flushed hfile.
-   * @throws IOException
-   */
-  Path getFlushedMobFile(FileSystem fs, TableName table) throws IOException {
-    Path regionDir = MobUtils.getMobRegionPath(conf, table);
-    Path famDir = new Path(regionDir, FAM_STR);
-
-    // keep doing this until we get a legit hfile
-    while (true) {
-      FileStatus[] hfFss = fs.listStatus(famDir);
-      if (hfFss.length == 0) {
-        continue;
-      }
-      for (FileStatus hfs : hfFss) {
-        if (!hfs.isDirectory()) {
-          return hfs.getPath();
-        }
-      }
-    }
-  }
-
-  /**
-   * Creates a new mob file name by the old one.
-   * @param oldFileName The old mob file name.
-   * @return The new mob file name.
-   */
-  String createMobFileName(String oldFileName) {
-    MobFileName mobFileName = MobFileName.create(oldFileName);
-    String startKey = mobFileName.getStartKey();
-    String date = mobFileName.getDate();
-    return MobFileName.create(startKey, date, UUID.randomUUID().toString().replaceAll("-", ""))
-      .getFileName();
-  }
-
-  /**
-   * This creates a table and then corrupts an hfile.  Hbck should quarantine the file.
-   */
-  @Test(timeout=180000)
-  public void testQuarantineCorruptHFile() throws Exception {
-    TableName table = TableName.valueOf(name.getMethodName());
-    try {
-      setupTable(table);
-      assertEquals(ROWKEYS.length, countRows());
-      admin.flush(table); // flush is async.
-
-      FileSystem fs = FileSystem.get(conf);
-      Path hfile = getFlushedHFile(fs, table);
-
-      // Mess it up by leaving a hole in the assignment, meta, and hdfs data
-      admin.disableTable(table);
-
-      // create new corrupt file called deadbeef (valid hfile name)
-      Path corrupt = new Path(hfile.getParent(), "deadbeef");
-      TestHFile.truncateFile(fs, hfile, corrupt);
-      LOG.info("Created corrupted file " + corrupt);
-      HBaseFsck.debugLsr(conf, FSUtils.getRootDir(conf));
-
-      // we cannot enable here because enable never finished due to the corrupt region.
-      HBaseFsck res = HbckTestingUtil.doHFileQuarantine(conf, table);
-      assertEquals(res.getRetCode(), 0);
-      HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
-      assertEquals(hfcc.getHFilesChecked(), 5);
-      assertEquals(hfcc.getCorrupted().size(), 1);
-      assertEquals(hfcc.getFailures().size(), 0);
-      assertEquals(hfcc.getQuarantined().size(), 1);
-      assertEquals(hfcc.getMissing().size(), 0);
-
-      // Its been fixed, verify that we can enable.
-      admin.enableTable(table);
-    } finally {
-      cleanupTable(table);
-    }
-  }
-
-  /**
-   * This creates a table and then corrupts a mob file.  Hbck should quarantine the file.
-   */
-  @Test(timeout=180000)
-  public void testQuarantineCorruptMobFile() throws Exception {
-    TableName table = TableName.valueOf(name.getMethodName());
-    try {
-      setupMobTable(table);
-      assertEquals(ROWKEYS.length, countRows());
-      admin.flush(table);
-
-      FileSystem fs = FileSystem.get(conf);
-      Path mobFile = getFlushedMobFile(fs, table);
-      admin.disableTable(table);
-      // create new corrupt mob file.
-      String corruptMobFile = createMobFileName(mobFile.getName());
-      Path corrupt = new Path(mobFile.getParent(), corruptMobFile);
-      TestHFile.truncateFile(fs, mobFile, corrupt);
-      LOG.info("Created corrupted mob file " + corrupt);
-      HBaseFsck.debugLsr(conf, FSUtils.getRootDir(conf));
-      HBaseFsck.debugLsr(conf, MobUtils.getMobHome(conf));
-
-      // A corrupt mob file doesn't abort the start of regions, so we can enable the table.
-      admin.enableTable(table);
-      HBaseFsck res = HbckTestingUtil.doHFileQuarantine(conf, table);
-      assertEquals(res.getRetCode(), 0);
-      HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
-      assertEquals(hfcc.getHFilesChecked(), 4);
-      assertEquals(hfcc.getCorrupted().size(), 0);
-      assertEquals(hfcc.getFailures().size(), 0);
-      assertEquals(hfcc.getQuarantined().size(), 0);
-      assertEquals(hfcc.getMissing().size(), 0);
-      assertEquals(hfcc.getMobFilesChecked(), 5);
-      assertEquals(hfcc.getCorruptedMobFiles().size(), 1);
-      assertEquals(hfcc.getFailureMobFiles().size(), 0);
-      assertEquals(hfcc.getQuarantinedMobFiles().size(), 1);
-      assertEquals(hfcc.getMissedMobFiles().size(), 0);
-      String quarantinedMobFile = hfcc.getQuarantinedMobFiles().iterator().next().getName();
-      assertEquals(corruptMobFile, quarantinedMobFile);
-    } finally {
-      cleanupTable(table);
-    }
-  }
-
-  /**
-   * Test that use this should have a timeout, because this method could potentially wait forever.
-  */
-  private void doQuarantineTest

<TRUNCATED>

[4/4] hbase git commit: HBASE-14570 Split TestHBaseFsck in order to help with hanging tests

Posted by ec...@apache.org.
HBASE-14570 Split TestHBaseFsck in order to help with hanging tests


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/fbd2ed2e
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/fbd2ed2e
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/fbd2ed2e

Branch: refs/heads/master
Commit: fbd2ed2e02d3b6c7287331b345d328b9518ec6ba
Parents: fe0bdbe
Author: Elliott Clark <ec...@apache.org>
Authored: Tue Oct 13 09:37:03 2015 -0700
Committer: Elliott Clark <ec...@apache.org>
Committed: Wed Oct 14 16:57:41 2015 -0400

----------------------------------------------------------------------
 .../hadoop/hbase/util/BaseTestHBaseFsck.java    |  984 ++++++
 .../apache/hadoop/hbase/util/TestHBaseFsck.java | 2914 ------------------
 .../hadoop/hbase/util/TestHBaseFsckMOB.java     |  140 +
 .../hadoop/hbase/util/TestHBaseFsckOneRS.java   | 1477 +++++++++
 .../hbase/util/TestHBaseFsckReplicas.java       |  257 ++
 .../hadoop/hbase/util/TestHBaseFsckTwoRS.java   |  464 +++
 6 files changed, 3322 insertions(+), 2914 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/fbd2ed2e/hbase-server/src/test/java/org/apache/hadoop/hbase/util/BaseTestHBaseFsck.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/BaseTestHBaseFsck.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/BaseTestHBaseFsck.java
new file mode 100644
index 0000000..cc29732
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/BaseTestHBaseFsck.java
@@ -0,0 +1,984 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util;
+
+import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
+import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertNoErrors;
+import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.NavigableMap;
+import java.util.Set;
+import java.util.UUID;
+import java.util.concurrent.Callable;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.ScheduledThreadPoolExecutor;
+import java.util.concurrent.SynchronousQueue;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.ClusterStatus;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HRegionLocation;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.MetaTableAccessor;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.ClusterConnection;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Durability;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.client.HConnection;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.client.RegionReplicaUtil;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.coprocessor.BaseMasterObserver;
+import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
+import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment;
+import org.apache.hadoop.hbase.coprocessor.ObserverContext;
+import org.apache.hadoop.hbase.io.hfile.TestHFile;
+import org.apache.hadoop.hbase.master.AssignmentManager;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.master.RegionState;
+import org.apache.hadoop.hbase.master.RegionStates;
+import org.apache.hadoop.hbase.master.TableLockManager;
+import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
+import org.apache.hadoop.hbase.mob.MobFileName;
+import org.apache.hadoop.hbase.mob.MobUtils;
+import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
+import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.regionserver.SplitTransactionFactory;
+import org.apache.hadoop.hbase.regionserver.SplitTransactionImpl;
+import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MiscTests;
+import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter;
+import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
+import org.apache.hadoop.hbase.util.HBaseFsck.HbckInfo;
+import org.apache.hadoop.hbase.util.HBaseFsck.PrintingErrorReporter;
+import org.apache.hadoop.hbase.util.HBaseFsck.TableInfo;
+import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
+import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil;
+import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
+import org.apache.zookeeper.KeeperException;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Ignore;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+
+import com.google.common.collect.Multimap;
+
+/**
+ * This is the base class for  HBaseFsck's ability to detect reasons for inconsistent tables.
+ *
+ * Actual tests are in :
+ * TestHBaseFsckTwoRS
+ * TestHBaseFsckOneRS
+ * TestHBaseFsckMOB
+ * TestHBaseFsckReplicas
+ */
+public class BaseTestHBaseFsck {
+  static final int POOL_SIZE = 7;
+  protected static final Log LOG = LogFactory.getLog(BaseTestHBaseFsck.class);
+  protected final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+  protected final static Configuration conf = TEST_UTIL.getConfiguration();
+  protected final static String FAM_STR = "fam";
+  protected final static byte[] FAM = Bytes.toBytes(FAM_STR);
+  protected final static int REGION_ONLINE_TIMEOUT = 800;
+  protected static RegionStates regionStates;
+  protected static ExecutorService tableExecutorService;
+  protected static ScheduledThreadPoolExecutor hbfsckExecutorService;
+  protected static ClusterConnection connection;
+  protected static Admin admin;
+
+  // for the instance, reset every test run
+  protected Table tbl;
+  protected final static byte[][] SPLITS = new byte[][] { Bytes.toBytes("A"),
+    Bytes.toBytes("B"), Bytes.toBytes("C") };
+  // one row per region.
+  protected final static byte[][] ROWKEYS= new byte[][] {
+    Bytes.toBytes("00"), Bytes.toBytes("50"), Bytes.toBytes("A0"), Bytes.toBytes("A5"),
+    Bytes.toBytes("B0"), Bytes.toBytes("B5"), Bytes.toBytes("C0"), Bytes.toBytes("C5") };
+
+  
+  /**
+   * Create a new region in META.
+   */
+  protected HRegionInfo createRegion(final HTableDescriptor
+      htd, byte[] startKey, byte[] endKey)
+      throws IOException {
+    Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
+    HRegionInfo hri = new HRegionInfo(htd.getTableName(), startKey, endKey);
+    MetaTableAccessor.addRegionToMeta(meta, hri);
+    meta.close();
+    return hri;
+  }
+
+  /**
+   * Debugging method to dump the contents of meta.
+   */
+  protected void dumpMeta(TableName tableName) throws IOException {
+    List<byte[]> metaRows = TEST_UTIL.getMetaTableRows(tableName);
+    for (byte[] row : metaRows) {
+      LOG.info(Bytes.toString(row));
+    }
+  }
+
+  /**
+   * This method is used to undeploy a region -- close it and attempt to
+   * remove its state from the Master.
+   */
+  protected void undeployRegion(Connection conn, ServerName sn,
+      HRegionInfo hri) throws IOException, InterruptedException {
+    try {
+      HBaseFsckRepair.closeRegionSilentlyAndWait((HConnection) conn, sn, hri);
+      if (!hri.isMetaTable()) {
+        admin.offline(hri.getRegionName());
+      }
+    } catch (IOException ioe) {
+      LOG.warn("Got exception when attempting to offline region "
+          + Bytes.toString(hri.getRegionName()), ioe);
+    }
+  }
+  /**
+   * Delete a region from assignments, meta, or completely from hdfs.
+   * @param unassign if true unassign region if assigned
+   * @param metaRow  if true remove region's row from META
+   * @param hdfs if true remove region's dir in HDFS
+   */
+  protected void deleteRegion(Configuration conf, final HTableDescriptor htd,
+      byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
+      boolean hdfs) throws IOException, InterruptedException {
+    deleteRegion(conf, htd, startKey, endKey, unassign, metaRow, hdfs, false,
+        HRegionInfo.DEFAULT_REPLICA_ID);
+  }
+
+  /**
+   * Delete a region from assignments, meta, or completely from hdfs.
+   * @param unassign if true unassign region if assigned
+   * @param metaRow  if true remove region's row from META
+   * @param hdfs if true remove region's dir in HDFS
+   * @param regionInfoOnly if true remove a region dir's .regioninfo file
+   * @param replicaId replica id
+   */
+  protected void deleteRegion(Configuration conf, final HTableDescriptor htd,
+      byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
+      boolean hdfs, boolean regionInfoOnly, int replicaId)
+          throws IOException, InterruptedException {
+    LOG.info("** Before delete:");
+    dumpMeta(htd.getTableName());
+
+    List<HRegionLocation> locations;
+    try(RegionLocator rl = connection.getRegionLocator(tbl.getName())) {
+      locations = rl.getAllRegionLocations();
+    }
+
+    for (HRegionLocation location : locations) {
+      HRegionInfo hri = location.getRegionInfo();
+      ServerName hsa = location.getServerName();
+      if (Bytes.compareTo(hri.getStartKey(), startKey) == 0
+          && Bytes.compareTo(hri.getEndKey(), endKey) == 0
+          && hri.getReplicaId() == replicaId) {
+
+        LOG.info("RegionName: " +hri.getRegionNameAsString());
+        byte[] deleteRow = hri.getRegionName();
+
+        if (unassign) {
+          LOG.info("Undeploying region " + hri + " from server " + hsa);
+          undeployRegion(connection, hsa, hri);
+        }
+
+        if (regionInfoOnly) {
+          LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
+          Path rootDir = FSUtils.getRootDir(conf);
+          FileSystem fs = rootDir.getFileSystem(conf);
+          Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()),
+              hri.getEncodedName());
+          Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
+          fs.delete(hriPath, true);
+        }
+
+        if (hdfs) {
+          LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
+          Path rootDir = FSUtils.getRootDir(conf);
+          FileSystem fs = rootDir.getFileSystem(conf);
+          Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()),
+              hri.getEncodedName());
+          HBaseFsck.debugLsr(conf, p);
+          boolean success = fs.delete(p, true);
+          LOG.info("Deleted " + p + " sucessfully? " + success);
+          HBaseFsck.debugLsr(conf, p);
+        }
+
+        if (metaRow) {
+          try (Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService)) {
+            Delete delete = new Delete(deleteRow);
+            meta.delete(delete);
+          }
+        }
+      }
+      LOG.info(hri.toString() + hsa.toString());
+    }
+
+    TEST_UTIL.getMetaTableRows(htd.getTableName());
+    LOG.info("*** After delete:");
+    dumpMeta(htd.getTableName());
+  }
+
+  /**
+   * Setup a clean table before we start mucking with it.
+   *
+   * It will set tbl which needs to be closed after test
+   *
+   * @throws IOException
+   * @throws InterruptedException
+   * @throws KeeperException
+   */
+  void setupTable(TableName tablename) throws Exception {
+    setupTableWithRegionReplica(tablename, 1);
+  }
+
+  /**
+   * Setup a clean table with a certain region_replica count
+   *
+   * It will set tbl which needs to be closed after test
+   *
+   * @param tableName
+   * @param replicaCount
+   * @throws Exception
+   */
+  void setupTableWithRegionReplica(TableName tablename, int replicaCount) throws Exception {
+    HTableDescriptor desc = new HTableDescriptor(tablename);
+    desc.setRegionReplication(replicaCount);
+    HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
+    desc.addFamily(hcd); // If a table has no CF's it doesn't get checked
+    createTable(TEST_UTIL, desc, SPLITS);
+
+    tbl = connection.getTable(tablename, tableExecutorService);
+    List<Put> puts = new ArrayList<Put>();
+    for (byte[] row : ROWKEYS) {
+      Put p = new Put(row);
+      p.addColumn(FAM, Bytes.toBytes("val"), row);
+      puts.add(p);
+    }
+    tbl.put(puts);
+  }
+
+  /**
+   * Setup a clean table with a mob-enabled column.
+   *
+   * @param tableName The name of a table to be created.
+   * @throws Exception
+   */
+  void setupMobTable(TableName tablename) throws Exception {
+    HTableDescriptor desc = new HTableDescriptor(tablename);
+    HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
+    hcd.setMobEnabled(true);
+    hcd.setMobThreshold(0);
+    desc.addFamily(hcd); // If a table has no CF's it doesn't get checked
+    createTable(TEST_UTIL, desc, SPLITS);
+
+    tbl = connection.getTable(tablename, tableExecutorService);
+    List<Put> puts = new ArrayList<Put>();
+    for (byte[] row : ROWKEYS) {
+      Put p = new Put(row);
+      p.add(FAM, Bytes.toBytes("val"), row);
+      puts.add(p);
+    }
+    tbl.put(puts);
+  }
+
+  /**
+   * Counts the number of row to verify data loss or non-dataloss.
+   */
+  int countRows() throws IOException {
+     Scan s = new Scan();
+     ResultScanner rs = tbl.getScanner(s);
+     int i = 0;
+     while(rs.next() !=null) {
+       i++;
+     }
+     return i;
+  }
+
+  /**
+   * delete table in preparation for next test
+   *
+   * @param tablename
+   * @throws IOException
+   */
+  void cleanupTable(TableName tablename) throws Exception {
+    if (tbl != null) {
+      tbl.close();
+      tbl = null;
+    }
+
+    ((ClusterConnection) connection).clearRegionCache();
+    deleteTable(TEST_UTIL, tablename);
+  }
+
+  /**
+   * Get region info from local cluster.
+   */
+  Map<ServerName, List<String>> getDeployedHRIs(final HBaseAdmin admin) throws IOException {
+    ClusterStatus status = admin.getClusterStatus();
+    Collection<ServerName> regionServers = status.getServers();
+    Map<ServerName, List<String>> mm =
+        new HashMap<ServerName, List<String>>();
+    for (ServerName hsi : regionServers) {
+      AdminProtos.AdminService.BlockingInterface server = ((HConnection) connection).getAdmin(hsi);
+
+      // list all online regions from this region server
+      List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
+      List<String> regionNames = new ArrayList<String>();
+      for (HRegionInfo hri : regions) {
+        regionNames.add(hri.getRegionNameAsString());
+      }
+      mm.put(hsi, regionNames);
+    }
+    return mm;
+  }
+
+  /**
+   * Returns the HSI a region info is on.
+   */
+  ServerName findDeployedHSI(Map<ServerName, List<String>> mm, HRegionInfo hri) {
+    for (Map.Entry<ServerName,List <String>> e : mm.entrySet()) {
+      if (e.getValue().contains(hri.getRegionNameAsString())) {
+        return e.getKey();
+      }
+    }
+    return null;
+  }
+
+
+
+
+  /**
+   * This creates and fixes a bad table with a missing region -- hole in meta
+   * and data present but .regioinfino missing (an orphan hdfs region)in the fs.
+   */
+  @Test (timeout=180000)
+  public void testHDFSRegioninfoMissing() throws Exception {
+    TableName table = TableName.valueOf("tableHDFSRegioninfoMissing");
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // Mess it up by leaving a hole in the meta data
+      admin.disableTable(table);
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true,
+          true, false, true, HRegionInfo.DEFAULT_REPLICA_ID);
+      admin.enableTable(table);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck,
+          new ERROR_CODE[] { ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
+              ERROR_CODE.HOLE_IN_REGION_CHAIN });
+      // holes are separate from overlap groups
+      assertEquals(0, hbck.getOverlapGroups(table).size());
+
+      // fix hole
+      doFsck(conf, true);
+
+      // check that hole fixed
+      assertNoErrors(doFsck(conf, false));
+      assertEquals(ROWKEYS.length, countRows());
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * This creates and fixes a bad table with a region that is missing meta and
+   * not assigned to a region server.
+   */
+  @Test (timeout=180000)
+  public void testNotInMetaOrDeployedHole() throws Exception {
+    TableName table =
+        TableName.valueOf("tableNotInMetaOrDeployedHole");
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // Mess it up by leaving a hole in the meta data
+      admin.disableTable(table);
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true,
+          true, false); // don't rm from fs
+      admin.enableTable(table);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck,
+          new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN });
+      // holes are separate from overlap groups
+      assertEquals(0, hbck.getOverlapGroups(table).size());
+
+      // fix hole
+      assertErrors(doFsck(conf, true),
+          new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN });
+
+      // check that hole fixed
+      assertNoErrors(doFsck(conf, false));
+      assertEquals(ROWKEYS.length, countRows());
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  @Test (timeout=180000)
+  public void testCleanUpDaughtersNotInMetaAfterFailedSplit() throws Exception {
+    TableName table = TableName.valueOf("testCleanUpDaughtersNotInMetaAfterFailedSplit");
+    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
+    try {
+      HTableDescriptor desc = new HTableDescriptor(table);
+      desc.addFamily(new HColumnDescriptor(Bytes.toBytes("f")));
+      createTable(TEST_UTIL, desc, null);
+
+      tbl = connection.getTable(desc.getTableName());
+      for (int i = 0; i < 5; i++) {
+        Put p1 = new Put(("r" + i).getBytes());
+        p1.add(Bytes.toBytes("f"), "q1".getBytes(), "v".getBytes());
+        tbl.put(p1);
+      }
+      admin.flush(desc.getTableName());
+      List<HRegion> regions = cluster.getRegions(desc.getTableName());
+      int serverWith = cluster.getServerWith(regions.get(0).getRegionInfo().getRegionName());
+      HRegionServer regionServer = cluster.getRegionServer(serverWith);
+      cluster.getServerWith(regions.get(0).getRegionInfo().getRegionName());
+      SplitTransactionImpl st = (SplitTransactionImpl)
+        new SplitTransactionFactory(TEST_UTIL.getConfiguration())
+          .create(regions.get(0), Bytes.toBytes("r3"));
+      st.prepare();
+      st.stepsBeforePONR(regionServer, regionServer, false);
+      AssignmentManager am = cluster.getMaster().getAssignmentManager();
+      Map<String, RegionState> regionsInTransition = am.getRegionStates().getRegionsInTransition();
+      for (RegionState state : regionsInTransition.values()) {
+        am.regionOffline(state.getRegion());
+      }
+      Map<HRegionInfo, ServerName> regionsMap = new HashMap<HRegionInfo, ServerName>();
+      regionsMap.put(regions.get(0).getRegionInfo(), regionServer.getServerName());
+      am.assign(regionsMap);
+      am.waitForAssignment(regions.get(0).getRegionInfo());
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
+          ERROR_CODE.NOT_IN_META_OR_DEPLOYED });
+      // holes are separate from overlap groups
+      assertEquals(0, hbck.getOverlapGroups(table).size());
+
+      // fix hole
+      assertErrors(
+          doFsck(conf, false, true, false, false, false, false, false, false, false, false, null),
+          new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
+              ERROR_CODE.NOT_IN_META_OR_DEPLOYED });
+
+      // check that hole fixed
+      assertNoErrors(doFsck(conf, false));
+      assertEquals(5, countRows());
+    } finally {
+      if (tbl != null) {
+        tbl.close();
+        tbl = null;
+      }
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * This creates fixes a bad table with a hole in meta.
+   */
+  @Test (timeout=180000)
+  public void testNotInMetaHole() throws Exception {
+    TableName table =
+        TableName.valueOf("tableNotInMetaHole");
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // Mess it up by leaving a hole in the meta data
+      admin.disableTable(table);
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), false,
+          true, false); // don't rm from fs
+      admin.enableTable(table);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck,
+          new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN });
+      // holes are separate from overlap groups
+      assertEquals(0, hbck.getOverlapGroups(table).size());
+
+      // fix hole
+      assertErrors(doFsck(conf, true),
+          new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN });
+
+      // check that hole fixed
+      assertNoErrors(doFsck(conf, false));
+      assertEquals(ROWKEYS.length, countRows());
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * This creates and fixes a bad table with a region that is in meta but has
+   * no deployment or data hdfs
+   */
+  @Test (timeout=180000)
+  public void testNotInHdfs() throws Exception {
+    TableName table =
+        TableName.valueOf("tableNotInHdfs");
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // make sure data in regions, if in wal only there is no data loss
+      admin.flush(table);
+
+      // Mess it up by leaving a hole in the hdfs data
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), false,
+          false, true); // don't rm meta
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
+      // holes are separate from overlap groups
+      assertEquals(0, hbck.getOverlapGroups(table).size());
+
+      // fix hole
+      doFsck(conf, true);
+
+      // check that hole fixed
+      assertNoErrors(doFsck(conf,false));
+      assertEquals(ROWKEYS.length - 2, countRows());
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+
+  public void deleteTableDir(TableName table) throws IOException {
+    Path rootDir = FSUtils.getRootDir(conf);
+    FileSystem fs = rootDir.getFileSystem(conf);
+    Path p = FSUtils.getTableDir(rootDir, table);
+    HBaseFsck.debugLsr(conf, p);
+    boolean success = fs.delete(p, true);
+    LOG.info("Deleted " + p + " sucessfully? " + success);
+  }
+
+
+
+
+
+  /**
+   * We don't have an easy way to verify that a flush completed, so we loop until we find a
+   * legitimate hfile and return it.
+   * @param fs
+   * @param table
+   * @return Path of a flushed hfile.
+   * @throws IOException
+   */
+  Path getFlushedHFile(FileSystem fs, TableName table) throws IOException {
+    Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
+    Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
+    Path famDir = new Path(regionDir, FAM_STR);
+
+    // keep doing this until we get a legit hfile
+    while (true) {
+      FileStatus[] hfFss = fs.listStatus(famDir);
+      if (hfFss.length == 0) {
+        continue;
+      }
+      for (FileStatus hfs : hfFss) {
+        if (!hfs.isDirectory()) {
+          return hfs.getPath();
+        }
+      }
+    }
+  }
+
+  /**
+   * Gets flushed mob files.
+   * @param fs The current file system.
+   * @param table The current table name.
+   * @return Path of a flushed hfile.
+   * @throws IOException
+   */
+  Path getFlushedMobFile(FileSystem fs, TableName table) throws IOException {
+    Path regionDir = MobUtils.getMobRegionPath(conf, table);
+    Path famDir = new Path(regionDir, FAM_STR);
+
+    // keep doing this until we get a legit hfile
+    while (true) {
+      FileStatus[] hfFss = fs.listStatus(famDir);
+      if (hfFss.length == 0) {
+        continue;
+      }
+      for (FileStatus hfs : hfFss) {
+        if (!hfs.isDirectory()) {
+          return hfs.getPath();
+        }
+      }
+    }
+  }
+
+  /**
+   * Creates a new mob file name by the old one.
+   * @param oldFileName The old mob file name.
+   * @return The new mob file name.
+   */
+  String createMobFileName(String oldFileName) {
+    MobFileName mobFileName = MobFileName.create(oldFileName);
+    String startKey = mobFileName.getStartKey();
+    String date = mobFileName.getDate();
+    return MobFileName.create(startKey, date, UUID.randomUUID().toString().replaceAll("-", ""))
+      .getFileName();
+  }
+
+
+
+
+  /**
+   * Test that use this should have a timeout, because this method could potentially wait forever.
+  */
+  protected void doQuarantineTest(TableName table, HBaseFsck hbck, int check,
+                                  int corrupt, int fail, int quar, int missing) throws Exception {
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+      admin.flush(table); // flush is async.
+
+      // Mess it up by leaving a hole in the assignment, meta, and hdfs data
+      admin.disableTable(table);
+
+      String[] args = {"-sidelineCorruptHFiles", "-repairHoles", "-ignorePreCheckPermission",
+          table.getNameAsString()};
+      HBaseFsck res = hbck.exec(hbfsckExecutorService, args);
+
+      HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
+      assertEquals(hfcc.getHFilesChecked(), check);
+      assertEquals(hfcc.getCorrupted().size(), corrupt);
+      assertEquals(hfcc.getFailures().size(), fail);
+      assertEquals(hfcc.getQuarantined().size(), quar);
+      assertEquals(hfcc.getMissing().size(), missing);
+
+      // its been fixed, verify that we can enable
+      admin.enableTableAsync(table);
+      while (!admin.isTableEnabled(table)) {
+        try {
+          Thread.sleep(250);
+        } catch (InterruptedException e) {
+          e.printStackTrace();
+          fail("Interrupted when trying to enable table " + table);
+        }
+      }
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * This creates a table and simulates the race situation where a concurrent compaction or split
+   * has removed an colfam dir before the corruption checker got to it.
+   */
+  // Disabled because fails sporadically.  Is this test right?  Timing-wise, there could be no
+  // files in a column family on initial creation -- as suggested by Matteo.
+  @Ignore @Test(timeout=180000)
+  public void testQuarantineMissingFamdir() throws Exception {
+    TableName table = TableName.valueOf(name.getMethodName());
+    // inject a fault in the hfcc created.
+    final FileSystem fs = FileSystem.get(conf);
+    HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) {
+      @Override
+      public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles)
+          throws IOException {
+        return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
+          AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
+          @Override
+          protected void checkColFamDir(Path p) throws IOException {
+            if (attemptedFirstHFile.compareAndSet(false, true)) {
+              assertTrue(fs.delete(p, true)); // make sure delete happened.
+            }
+            super.checkColFamDir(p);
+          }
+        };
+      }
+    };
+    doQuarantineTest(table, hbck, 3, 0, 0, 0, 1);
+    hbck.close();
+  }
+
+  /**
+   * This creates a table and simulates the race situation where a concurrent compaction or split
+   * has removed a region dir before the corruption checker got to it.
+   */
+  @Test(timeout=180000)
+  public void testQuarantineMissingRegionDir() throws Exception {
+    TableName table = TableName.valueOf(name.getMethodName());
+    // inject a fault in the hfcc created.
+    final FileSystem fs = FileSystem.get(conf);
+    HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) {
+      @Override
+      public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles)
+      throws IOException {
+        return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
+          AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
+          @Override
+          protected void checkRegionDir(Path p) throws IOException {
+            if (attemptedFirstHFile.compareAndSet(false, true)) {
+              assertTrue(fs.delete(p, true)); // make sure delete happened.
+            }
+            super.checkRegionDir(p);
+          }
+        };
+      }
+    };
+    doQuarantineTest(table, hbck, 3, 0, 0, 0, 1);
+    hbck.close();
+  }
+
+
+
+
+
+  static class MockErrorReporter implements ErrorReporter {
+    static int calledCount = 0;
+
+    @Override
+    public void clear() {
+      calledCount++;
+    }
+
+    @Override
+    public void report(String message) {
+      calledCount++;
+    }
+
+    @Override
+    public void reportError(String message) {
+      calledCount++;
+    }
+
+    @Override
+    public void reportError(ERROR_CODE errorCode, String message) {
+      calledCount++;
+    }
+
+    @Override
+    public void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
+      calledCount++;
+    }
+
+    @Override
+    public void reportError(ERROR_CODE errorCode,
+        String message, TableInfo table, HbckInfo info) {
+      calledCount++;
+    }
+
+    @Override
+    public void reportError(ERROR_CODE errorCode, String message,
+        TableInfo table, HbckInfo info1, HbckInfo info2) {
+      calledCount++;
+    }
+
+    @Override
+    public int summarize() {
+      return ++calledCount;
+    }
+
+    @Override
+    public void detail(String details) {
+      calledCount++;
+    }
+
+    @Override
+    public ArrayList<ERROR_CODE> getErrorList() {
+      calledCount++;
+      return new ArrayList<ERROR_CODE>();
+    }
+
+    @Override
+    public void progress() {
+      calledCount++;
+    }
+
+    @Override
+    public void print(String message) {
+      calledCount++;
+    }
+
+    @Override
+    public void resetErrors() {
+      calledCount++;
+    }
+
+    @Override
+    public boolean tableHasErrors(TableInfo table) {
+      calledCount++;
+      return false;
+    }
+  }
+
+
+  protected void deleteMetaRegion(Configuration conf, boolean unassign, boolean hdfs,
+                                  boolean regionInfoOnly) throws IOException, InterruptedException {
+    HRegionLocation metaLocation = connection.getRegionLocator(TableName.META_TABLE_NAME)
+        .getRegionLocation(HConstants.EMPTY_START_ROW);
+    ServerName hsa = metaLocation.getServerName();
+    HRegionInfo hri = metaLocation.getRegionInfo();
+    if (unassign) {
+      LOG.info("Undeploying meta region " + hri + " from server " + hsa);
+      try (Connection unmanagedConnection = ConnectionFactory.createConnection(conf)) {
+        undeployRegion(unmanagedConnection, hsa, hri);
+      }
+    }
+
+    if (regionInfoOnly) {
+      LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
+      Path rootDir = FSUtils.getRootDir(conf);
+      FileSystem fs = rootDir.getFileSystem(conf);
+      Path p = new Path(rootDir + "/" + TableName.META_TABLE_NAME.getNameAsString(),
+          hri.getEncodedName());
+      Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
+      fs.delete(hriPath, true);
+    }
+
+    if (hdfs) {
+      LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
+      Path rootDir = FSUtils.getRootDir(conf);
+      FileSystem fs = rootDir.getFileSystem(conf);
+      Path p = new Path(rootDir + "/" + TableName.META_TABLE_NAME.getNameAsString(),
+          hri.getEncodedName());
+      HBaseFsck.debugLsr(conf, p);
+      boolean success = fs.delete(p, true);
+      LOG.info("Deleted " + p + " sucessfully? " + success);
+      HBaseFsck.debugLsr(conf, p);
+    }
+  }
+
+
+
+
+
+
+
+  @org.junit.Rule
+  public TestName name = new TestName();
+
+
+
+  public static class MasterSyncObserver extends BaseMasterObserver {
+    volatile CountDownLatch tableCreationLatch = null;
+    volatile CountDownLatch tableDeletionLatch = null;
+
+    @Override
+    public void postCreateTableHandler(final ObserverContext<MasterCoprocessorEnvironment> ctx,
+      HTableDescriptor desc, HRegionInfo[] regions) throws IOException {
+      // the AccessController test, some times calls only and directly the postCreateTableHandler()
+      if (tableCreationLatch != null) {
+        tableCreationLatch.countDown();
+      }
+    }
+
+    @Override
+    public void postDeleteTableHandler(final ObserverContext<MasterCoprocessorEnvironment> ctx,
+                                       TableName tableName)
+    throws IOException {
+      // the AccessController test, some times calls only and directly the postDeleteTableHandler()
+      if (tableDeletionLatch != null) {
+        tableDeletionLatch.countDown();
+      }
+    }
+  }
+
+  public static void createTable(HBaseTestingUtility testUtil, HTableDescriptor htd,
+    byte [][] splitKeys) throws Exception {
+    // NOTE: We need a latch because admin is not sync,
+    // so the postOp coprocessor method may be called after the admin operation returned.
+    MasterSyncObserver observer = (MasterSyncObserver)testUtil.getHBaseCluster().getMaster()
+      .getMasterCoprocessorHost().findCoprocessor(MasterSyncObserver.class.getName());
+    observer.tableCreationLatch = new CountDownLatch(1);
+    if (splitKeys != null) {
+      admin.createTable(htd, splitKeys);
+    } else {
+      admin.createTable(htd);
+    }
+    observer.tableCreationLatch.await();
+    observer.tableCreationLatch = null;
+    testUtil.waitUntilAllRegionsAssigned(htd.getTableName());
+  }
+
+  public static void deleteTable(HBaseTestingUtility testUtil, TableName tableName)
+    throws Exception {
+    // NOTE: We need a latch because admin is not sync,
+    // so the postOp coprocessor method may be called after the admin operation returned.
+    MasterSyncObserver observer = (MasterSyncObserver)testUtil.getHBaseCluster().getMaster()
+      .getMasterCoprocessorHost().findCoprocessor(MasterSyncObserver.class.getName());
+    observer.tableDeletionLatch = new CountDownLatch(1);
+    try {
+      admin.disableTable(tableName);
+    } catch (Exception e) {
+      LOG.debug("Table: " + tableName + " already disabled, so just deleting it.");
+    }
+    admin.deleteTable(tableName);
+    observer.tableDeletionLatch.await();
+    observer.tableDeletionLatch = null;
+  }
+}


[2/4] hbase git commit: HBASE-14570 Split TestHBaseFsck in order to help with hanging tests

Posted by ec...@apache.org.
http://git-wip-us.apache.org/repos/asf/hbase/blob/fbd2ed2e/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckMOB.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckMOB.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckMOB.java
new file mode 100644
index 0000000..8e96f83
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckMOB.java
@@ -0,0 +1,140 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.ClusterConnection;
+import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
+import org.apache.hadoop.hbase.io.hfile.TestHFile;
+import org.apache.hadoop.hbase.master.AssignmentManager;
+import org.apache.hadoop.hbase.mob.MobUtils;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MiscTests;
+import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
+import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import java.util.concurrent.ScheduledThreadPoolExecutor;
+import java.util.concurrent.SynchronousQueue;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+
+import static org.junit.Assert.assertEquals;
+
+@Category({MiscTests.class, LargeTests.class})
+public class TestHBaseFsckMOB extends BaseTestHBaseFsck {
+  @BeforeClass
+  public static void setUpBeforeClass() throws Exception {
+    TEST_UTIL.getConfiguration().set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY,
+        MasterSyncObserver.class.getName());
+
+    conf.setInt("hbase.regionserver.handler.count", 2);
+    conf.setInt("hbase.regionserver.metahandler.count", 30);
+
+    conf.setInt("hbase.htable.threads.max", POOL_SIZE);
+    conf.setInt("hbase.hconnection.threads.max", 2 * POOL_SIZE);
+    conf.setInt("hbase.hconnection.threads.core", POOL_SIZE);
+    conf.setInt("hbase.hbck.close.timeout", 2 * REGION_ONLINE_TIMEOUT);
+    conf.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY, 8 * REGION_ONLINE_TIMEOUT);
+    TEST_UTIL.startMiniCluster(1);
+
+    tableExecutorService = new ThreadPoolExecutor(1, POOL_SIZE, 60, TimeUnit.SECONDS,
+        new SynchronousQueue<Runnable>(), Threads.newDaemonThreadFactory("testhbck"));
+
+    hbfsckExecutorService = new ScheduledThreadPoolExecutor(POOL_SIZE);
+
+    AssignmentManager assignmentManager =
+        TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
+    regionStates = assignmentManager.getRegionStates();
+
+    connection = (ClusterConnection) TEST_UTIL.getConnection();
+
+    admin = connection.getAdmin();
+    admin.setBalancerRunning(false, true);
+
+    TEST_UTIL.waitUntilAllRegionsAssigned(TableName.META_TABLE_NAME);
+    TEST_UTIL.waitUntilAllRegionsAssigned(TableName.NAMESPACE_TABLE_NAME);
+  }
+
+  @AfterClass
+  public static void tearDownAfterClass() throws Exception {
+    tableExecutorService.shutdown();
+    hbfsckExecutorService.shutdown();
+    admin.close();
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  @Before
+  public void setUp() {
+    EnvironmentEdgeManager.reset();
+  }
+
+
+  /**
+   * This creates a table and then corrupts a mob file.  Hbck should quarantine the file.
+   */
+  @Test(timeout=180000)
+  public void testQuarantineCorruptMobFile() throws Exception {
+    TableName table = TableName.valueOf(name.getMethodName());
+    try {
+      setupMobTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+      admin.flush(table);
+
+      FileSystem fs = FileSystem.get(conf);
+      Path mobFile = getFlushedMobFile(fs, table);
+      admin.disableTable(table);
+      // create new corrupt mob file.
+      String corruptMobFile = createMobFileName(mobFile.getName());
+      Path corrupt = new Path(mobFile.getParent(), corruptMobFile);
+      TestHFile.truncateFile(fs, mobFile, corrupt);
+      LOG.info("Created corrupted mob file " + corrupt);
+      HBaseFsck.debugLsr(conf, FSUtils.getRootDir(conf));
+      HBaseFsck.debugLsr(conf, MobUtils.getMobHome(conf));
+
+      // A corrupt mob file doesn't abort the start of regions, so we can enable the table.
+      admin.enableTable(table);
+      HBaseFsck res = HbckTestingUtil.doHFileQuarantine(conf, table);
+      assertEquals(res.getRetCode(), 0);
+      HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
+      assertEquals(hfcc.getHFilesChecked(), 4);
+      assertEquals(hfcc.getCorrupted().size(), 0);
+      assertEquals(hfcc.getFailures().size(), 0);
+      assertEquals(hfcc.getQuarantined().size(), 0);
+      assertEquals(hfcc.getMissing().size(), 0);
+      assertEquals(hfcc.getMobFilesChecked(), 5);
+      assertEquals(hfcc.getCorruptedMobFiles().size(), 1);
+      assertEquals(hfcc.getFailureMobFiles().size(), 0);
+      assertEquals(hfcc.getQuarantinedMobFiles().size(), 1);
+      assertEquals(hfcc.getMissedMobFiles().size(), 0);
+      String quarantinedMobFile = hfcc.getQuarantinedMobFiles().iterator().next().getName();
+      assertEquals(corruptMobFile, quarantinedMobFile);
+    } finally {
+      cleanupTable(table);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/fbd2ed2e/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckOneRS.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckOneRS.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckOneRS.java
new file mode 100644
index 0000000..a44ccd1
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckOneRS.java
@@ -0,0 +1,1477 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HRegionLocation;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.MetaTableAccessor;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.ClusterConnection;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
+import org.apache.hadoop.hbase.io.hfile.TestHFile;
+import org.apache.hadoop.hbase.master.AssignmentManager;
+import org.apache.hadoop.hbase.master.RegionStates;
+import org.apache.hadoop.hbase.master.TableLockManager;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MiscTests;
+import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
+import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.concurrent.Callable;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.ScheduledThreadPoolExecutor;
+import java.util.concurrent.SynchronousQueue;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.*;
+import static org.junit.Assert.*;
+
+@Category({MiscTests.class, LargeTests.class})
+public class TestHBaseFsckOneRS extends BaseTestHBaseFsck {
+
+  @BeforeClass
+  public static void setUpBeforeClass() throws Exception {
+    TEST_UTIL.getConfiguration().set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY,
+        MasterSyncObserver.class.getName());
+
+    conf.setInt("hbase.regionserver.handler.count", 2);
+    conf.setInt("hbase.regionserver.metahandler.count", 30);
+
+    conf.setInt("hbase.htable.threads.max", POOL_SIZE);
+    conf.setInt("hbase.hconnection.threads.max", 2 * POOL_SIZE);
+    conf.setInt("hbase.hconnection.threads.core", POOL_SIZE);
+    conf.setInt("hbase.hbck.close.timeout", 2 * REGION_ONLINE_TIMEOUT);
+    conf.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY, 8 * REGION_ONLINE_TIMEOUT);
+    TEST_UTIL.startMiniCluster(1);
+
+    tableExecutorService = new ThreadPoolExecutor(1, POOL_SIZE, 60, TimeUnit.SECONDS,
+        new SynchronousQueue<Runnable>(), Threads.newDaemonThreadFactory("testhbck"));
+
+    hbfsckExecutorService = new ScheduledThreadPoolExecutor(POOL_SIZE);
+
+    AssignmentManager assignmentManager =
+        TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
+    regionStates = assignmentManager.getRegionStates();
+
+    connection = (ClusterConnection) TEST_UTIL.getConnection();
+
+    admin = connection.getAdmin();
+    admin.setBalancerRunning(false, true);
+
+    TEST_UTIL.waitUntilAllRegionsAssigned(TableName.META_TABLE_NAME);
+    TEST_UTIL.waitUntilAllRegionsAssigned(TableName.NAMESPACE_TABLE_NAME);
+  }
+
+  @AfterClass
+  public static void tearDownAfterClass() throws Exception {
+    tableExecutorService.shutdown();
+    hbfsckExecutorService.shutdown();
+    admin.close();
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  @Before
+  public void setUp() {
+    EnvironmentEdgeManager.reset();
+  }
+
+
+  /**
+   * This creates a clean table and confirms that the table is clean.
+   */
+  @Test(timeout=180000)
+  public void testHBaseFsckClean() throws Exception {
+    assertNoErrors(doFsck(conf, false));
+    TableName table = TableName.valueOf("tableClean");
+    try {
+      HBaseFsck hbck = doFsck(conf, false);
+      assertNoErrors(hbck);
+
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // We created 1 table, should be fine
+      hbck = doFsck(conf, false);
+      assertNoErrors(hbck);
+      assertEquals(0, hbck.getOverlapGroups(table).size());
+      assertEquals(ROWKEYS.length, countRows());
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * Test thread pooling in the case where there are more regions than threads
+   */
+  @Test (timeout=180000)
+  public void testHbckThreadpooling() throws Exception {
+    TableName table =
+        TableName.valueOf("tableDupeStartKey");
+    try {
+      // Create table with 4 regions
+      setupTable(table);
+
+      // limit number of threads to 1.
+      Configuration newconf = new Configuration(conf);
+      newconf.setInt("hbasefsck.numthreads", 1);
+      assertNoErrors(doFsck(newconf, false));
+
+      // We should pass without triggering a RejectedExecutionException
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  @Test (timeout=180000)
+  public void testTableWithNoRegions() throws Exception {
+    // We might end up with empty regions in a table
+    // see also testNoHdfsTable()
+    TableName table =
+        TableName.valueOf(name.getMethodName());
+    try {
+      // create table with one region
+      HTableDescriptor desc = new HTableDescriptor(table);
+      HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
+      desc.addFamily(hcd); // If a table has no CF's it doesn't get checked
+      createTable(TEST_UTIL, desc, null);
+      tbl = connection.getTable(table, tableExecutorService);
+
+      // Mess it up by leaving a hole in the assignment, meta, and hdfs data
+      deleteRegion(conf, tbl.getTableDescriptor(), HConstants.EMPTY_START_ROW,
+          HConstants.EMPTY_END_ROW, false, false, true);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS });
+
+      doFsck(conf, true);
+
+      // fix hole
+      doFsck(conf, true);
+
+      // check that hole fixed
+      assertNoErrors(doFsck(conf, false));
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  @Test (timeout=180000)
+  public void testHbckFixOrphanTable() throws Exception {
+    TableName table = TableName.valueOf("tableInfo");
+    FileSystem fs = null;
+    Path tableinfo = null;
+    try {
+      setupTable(table);
+
+      Path hbaseTableDir = FSUtils.getTableDir(
+          FSUtils.getRootDir(conf), table);
+      fs = hbaseTableDir.getFileSystem(conf);
+      FileStatus status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
+      tableinfo = status.getPath();
+      fs.rename(tableinfo, new Path("/.tableinfo"));
+
+      //to report error if .tableinfo is missing.
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NO_TABLEINFO_FILE });
+
+      // fix OrphanTable with default .tableinfo (htd not yet cached on master)
+      hbck = doFsck(conf, true);
+      assertNoErrors(hbck);
+      status = null;
+      status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
+      assertNotNull(status);
+
+      HTableDescriptor htd = admin.getTableDescriptor(table);
+      htd.setValue("NOT_DEFAULT", "true");
+      admin.disableTable(table);
+      admin.modifyTable(table, htd);
+      admin.enableTable(table);
+      fs.delete(status.getPath(), true);
+
+      // fix OrphanTable with cache
+      htd = admin.getTableDescriptor(table); // warms up cached htd on master
+      hbck = doFsck(conf, true);
+      assertNoErrors(hbck);
+      status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
+      assertNotNull(status);
+      htd = admin.getTableDescriptor(table);
+      assertEquals(htd.getValue("NOT_DEFAULT"), "true");
+    } finally {
+      if (fs != null) {
+        fs.rename(new Path("/.tableinfo"), tableinfo);
+      }
+      cleanupTable(table);
+    }
+  }
+
+  @Test (timeout=180000)
+  public void testReadOnlyProperty() throws Exception {
+    HBaseFsck hbck = doFsck(conf, false);
+    Assert.assertEquals("shouldIgnorePreCheckPermission", true,
+        hbck.shouldIgnorePreCheckPermission());
+
+    hbck = doFsck(conf, true);
+    Assert.assertEquals("shouldIgnorePreCheckPermission", false,
+        hbck.shouldIgnorePreCheckPermission());
+
+    hbck = doFsck(conf, true);
+    hbck.setIgnorePreCheckPermission(true);
+    Assert.assertEquals("shouldIgnorePreCheckPermission", true,
+        hbck.shouldIgnorePreCheckPermission());
+  }
+
+  /**
+   * This creates and fixes a bad table where a region is completely contained
+   * by another region, and there is a hole (sort of like a bad split)
+   */
+  @Test (timeout=180000)
+  public void testOverlapAndOrphan() throws Exception {
+    TableName table =
+        TableName.valueOf("tableOverlapAndOrphan");
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // Mess it up by creating an overlap in the metadata
+      admin.disableTable(table);
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"), true,
+          true, false, true, HRegionInfo.DEFAULT_REPLICA_ID);
+      admin.enableTable(table);
+
+      HRegionInfo hriOverlap =
+          createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B"));
+      TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
+      TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
+          .waitForAssignment(hriOverlap);
+      ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
+      TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck,
+          new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.ORPHAN_HDFS_REGION, HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
+              HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
+
+      // fix the problem.
+      doFsck(conf, true);
+
+      // verify that overlaps are fixed
+      HBaseFsck hbck2 = doFsck(conf,false);
+      assertNoErrors(hbck2);
+      assertEquals(0, hbck2.getOverlapGroups(table).size());
+      assertEquals(ROWKEYS.length, countRows());
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * This creates and fixes a bad table where a region overlaps two regions --
+   * a start key contained in another region and its end key is contained in
+   * yet another region.
+   */
+  @Test (timeout=180000)
+  public void testCoveredStartKey() throws Exception {
+    TableName table =
+        TableName.valueOf("tableCoveredStartKey");
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // Mess it up by creating an overlap in the metadata
+      HRegionInfo hriOverlap =
+          createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B2"));
+      TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
+      TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
+          .waitForAssignment(hriOverlap);
+      ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
+      TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
+          HBaseFsck.ErrorReporter.ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
+      assertEquals(3, hbck.getOverlapGroups(table).size());
+      assertEquals(ROWKEYS.length, countRows());
+
+      // fix the problem.
+      doFsck(conf, true);
+
+      // verify that overlaps are fixed
+      HBaseFsck hbck2 = doFsck(conf, false);
+      assertErrors(hbck2, new HBaseFsck.ErrorReporter.ERROR_CODE[0]);
+      assertEquals(0, hbck2.getOverlapGroups(table).size());
+      assertEquals(ROWKEYS.length, countRows());
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * This creates and fixes a bad table with a missing region -- hole in meta
+   * and data missing in the fs.
+   */
+  @Test (timeout=180000)
+  public void testRegionHole() throws Exception {
+    TableName table =
+        TableName.valueOf("tableRegionHole");
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // Mess it up by leaving a hole in the assignment, meta, and hdfs data
+      admin.disableTable(table);
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true,
+          true, true);
+      admin.enableTable(table);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
+      // holes are separate from overlap groups
+      assertEquals(0, hbck.getOverlapGroups(table).size());
+
+      // fix hole
+      doFsck(conf, true);
+
+      // check that hole fixed
+      assertNoErrors(doFsck(conf,false));
+      assertEquals(ROWKEYS.length - 2, countRows()); // lost a region so lost a row
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * The region is not deployed when the table is disabled.
+   */
+  @Test (timeout=180000)
+  public void testRegionShouldNotBeDeployed() throws Exception {
+    TableName table =
+        TableName.valueOf("tableRegionShouldNotBeDeployed");
+    try {
+      LOG.info("Starting testRegionShouldNotBeDeployed.");
+      MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
+      assertTrue(cluster.waitForActiveAndReadyMaster());
+
+
+      byte[][] SPLIT_KEYS = new byte[][] { new byte[0], Bytes.toBytes("aaa"),
+          Bytes.toBytes("bbb"), Bytes.toBytes("ccc"), Bytes.toBytes("ddd") };
+      HTableDescriptor htdDisabled = new HTableDescriptor(table);
+      htdDisabled.addFamily(new HColumnDescriptor(FAM));
+
+      // Write the .tableinfo
+      FSTableDescriptors fstd = new FSTableDescriptors(conf);
+      fstd.createTableDescriptor(htdDisabled);
+      List<HRegionInfo> disabledRegions =
+          TEST_UTIL.createMultiRegionsInMeta(conf, htdDisabled, SPLIT_KEYS);
+
+      // Let's just assign everything to first RS
+      HRegionServer hrs = cluster.getRegionServer(0);
+
+      // Create region files.
+      admin.disableTable(table);
+      admin.enableTable(table);
+
+      // Disable the table and close its regions
+      admin.disableTable(table);
+      HRegionInfo region = disabledRegions.remove(0);
+      byte[] regionName = region.getRegionName();
+
+      // The region should not be assigned currently
+      assertTrue(cluster.getServerWith(regionName) == -1);
+
+      // Directly open a region on a region server.
+      // If going through AM/ZK, the region won't be open.
+      // Even it is opened, AM will close it which causes
+      // flakiness of this test.
+      HRegion r = HRegion.openHRegion(
+          region, htdDisabled, hrs.getWAL(region), conf);
+      hrs.addToOnlineRegions(r);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.SHOULD_NOT_BE_DEPLOYED });
+
+      // fix this fault
+      doFsck(conf, true);
+
+      // check result
+      assertNoErrors(doFsck(conf, false));
+    } finally {
+      admin.enableTable(table);
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * This test makes sure that parallel instances of Hbck is disabled.
+   *
+   * @throws Exception
+   */
+  @Test(timeout=180000)
+  public void testParallelHbck() throws Exception {
+    final ExecutorService service;
+    final Future<HBaseFsck> hbck1,hbck2;
+
+    class RunHbck implements Callable<HBaseFsck> {
+      boolean fail = true;
+      @Override
+      public HBaseFsck call(){
+        Configuration c = new Configuration(conf);
+        c.setInt("hbase.hbck.lockfile.attempts", 1);
+        // HBASE-13574 found that in HADOOP-2.6 and later, the create file would internally retry.
+        // To avoid flakiness of the test, set low max wait time.
+        c.setInt("hbase.hbck.lockfile.maxwaittime", 3);
+        try{
+          return doFsck(c, true); // Exclusive hbck only when fixing
+        } catch(Exception e){
+          if (e.getMessage().contains("Duplicate hbck")) {
+            fail = false;
+          }
+        }
+        // If we reach here, then an exception was caught
+        if (fail) fail();
+        return null;
+      }
+    }
+    service = Executors.newFixedThreadPool(2);
+    hbck1 = service.submit(new RunHbck());
+    hbck2 = service.submit(new RunHbck());
+    service.shutdown();
+    //wait for 15 seconds, for both hbck calls finish
+    service.awaitTermination(15, TimeUnit.SECONDS);
+    HBaseFsck h1 = hbck1.get();
+    HBaseFsck h2 = hbck2.get();
+    // Make sure only one of the calls was successful
+    assert(h1 == null || h2 == null);
+    if (h1 != null) {
+      assert(h1.getRetCode() >= 0);
+    }
+    if (h2 != null) {
+      assert(h2.getRetCode() >= 0);
+    }
+  }
+
+  /**
+   * This test makes sure that with enough retries both parallel instances
+   * of hbck will be completed successfully.
+   *
+   * @throws Exception
+   */
+  @Test (timeout=180000)
+  public void testParallelWithRetriesHbck() throws Exception {
+    final ExecutorService service;
+    final Future<HBaseFsck> hbck1,hbck2;
+
+    // With the ExponentialBackoffPolicyWithLimit (starting with 200 milliseconds sleep time, and
+    // max sleep time of 5 seconds), we can retry around 15 times within 80 seconds before bail out.
+    //
+    // Note: the reason to use 80 seconds is that in HADOOP-2.6 and later, the create file would
+    // retry up to HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds).  See HBASE-13574 for more
+    // details.
+    final int timeoutInSeconds = 80;
+    final int sleepIntervalInMilliseconds = 200;
+    final int maxSleepTimeInMilliseconds = 6000;
+    final int maxRetryAttempts = 15;
+
+    class RunHbck implements Callable<HBaseFsck>{
+
+      @Override
+      public HBaseFsck call() throws Exception {
+        // Increase retry attempts to make sure the non-active hbck doesn't get starved
+        Configuration c = new Configuration(conf);
+        c.setInt("hbase.hbck.lockfile.maxwaittime", timeoutInSeconds);
+        c.setInt("hbase.hbck.lockfile.attempt.sleep.interval", sleepIntervalInMilliseconds);
+        c.setInt("hbase.hbck.lockfile.attempt.maxsleeptime", maxSleepTimeInMilliseconds);
+        c.setInt("hbase.hbck.lockfile.attempts", maxRetryAttempts);
+        return doFsck(c, false);
+      }
+    }
+
+    service = Executors.newFixedThreadPool(2);
+    hbck1 = service.submit(new RunHbck());
+    hbck2 = service.submit(new RunHbck());
+    service.shutdown();
+    //wait for some time, for both hbck calls finish
+    service.awaitTermination(timeoutInSeconds * 2, TimeUnit.SECONDS);
+    HBaseFsck h1 = hbck1.get();
+    HBaseFsck h2 = hbck2.get();
+    // Both should be successful
+    assertNotNull(h1);
+    assertNotNull(h2);
+    assert(h1.getRetCode() >= 0);
+    assert(h2.getRetCode() >= 0);
+
+  }
+
+  @Test (timeout = 180000)
+  public void testRegionBoundariesCheck() throws Exception {
+    HBaseFsck hbck = doFsck(conf, false);
+    assertNoErrors(hbck); // no errors
+    try {
+      hbck.connect(); // need connection to have access to META
+      hbck.checkRegionBoundaries();
+    } catch (IllegalArgumentException e) {
+      if (e.getMessage().endsWith("not a valid DFS filename.")) {
+        fail("Table directory path is not valid." + e.getMessage());
+      }
+    } finally {
+      hbck.close();
+    }
+  }
+
+  @Test (timeout=180000)
+  public void testHbckAfterRegionMerge() throws Exception {
+    TableName table = TableName.valueOf("testMergeRegionFilesInHdfs");
+    Table meta = null;
+    try {
+      // disable CatalogJanitor
+      TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false);
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      try(RegionLocator rl = connection.getRegionLocator(tbl.getName())) {
+        // make sure data in regions, if in wal only there is no data loss
+        admin.flush(table);
+        HRegionInfo region1 = rl.getRegionLocation(Bytes.toBytes("A")).getRegionInfo();
+        HRegionInfo region2 = rl.getRegionLocation(Bytes.toBytes("B")).getRegionInfo();
+
+        int regionCountBeforeMerge = rl.getAllRegionLocations().size();
+
+        assertNotEquals(region1, region2);
+
+        // do a region merge
+        admin.mergeRegions(region1.getEncodedNameAsBytes(), region2.getEncodedNameAsBytes(), false);
+
+        // wait until region merged
+        long timeout = System.currentTimeMillis() + 30 * 1000;
+        while (true) {
+          if (rl.getAllRegionLocations().size() < regionCountBeforeMerge) {
+            break;
+          } else if (System.currentTimeMillis() > timeout) {
+            fail("Time out waiting on region " + region1.getEncodedName() + " and " + region2
+                .getEncodedName() + " be merged");
+          }
+          Thread.sleep(10);
+        }
+
+        assertEquals(ROWKEYS.length, countRows());
+
+        HBaseFsck hbck = doFsck(conf, false);
+        assertNoErrors(hbck); // no errors
+      }
+
+    } finally {
+      TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true);
+      cleanupTable(table);
+      IOUtils.closeQuietly(meta);
+    }
+  }
+  /**
+   * This creates entries in hbase:meta with no hdfs data.  This should cleanly
+   * remove the table.
+   */
+  @Test (timeout=180000)
+  public void testNoHdfsTable() throws Exception {
+    TableName table = TableName.valueOf("NoHdfsTable");
+    setupTable(table);
+    assertEquals(ROWKEYS.length, countRows());
+
+    // make sure data in regions, if in wal only there is no data loss
+    admin.flush(table);
+
+    // Mess it up by deleting hdfs dirs
+    deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""),
+        Bytes.toBytes("A"), false, false, true); // don't rm meta
+    deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
+        Bytes.toBytes("B"), false, false, true); // don't rm meta
+    deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
+        Bytes.toBytes("C"), false, false, true); // don't rm meta
+    deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"),
+        Bytes.toBytes(""), false, false, true); // don't rm meta
+
+    // also remove the table directory in hdfs
+    deleteTableDir(table);
+
+    HBaseFsck hbck = doFsck(conf, false);
+    assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS,
+        HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS, HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS,
+        HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS, HBaseFsck.ErrorReporter.ERROR_CODE.ORPHAN_TABLE_STATE, });
+    // holes are separate from overlap groups
+    assertEquals(0, hbck.getOverlapGroups(table).size());
+
+    // fix hole
+    doFsck(conf, true); // detect dangling regions and remove those
+
+    // check that hole fixed
+    assertNoErrors(doFsck(conf,false));
+    assertFalse("Table " + table + " should have been deleted", admin.tableExists(table));
+  }
+
+  /**
+   * when the hbase.version file missing, It is fix the fault.
+   */
+  @Test (timeout=180000)
+  public void testNoVersionFile() throws Exception {
+    // delete the hbase.version file
+    Path rootDir = FSUtils.getRootDir(conf);
+    FileSystem fs = rootDir.getFileSystem(conf);
+    Path versionFile = new Path(rootDir, HConstants.VERSION_FILE_NAME);
+    fs.delete(versionFile, true);
+
+    // test
+    HBaseFsck hbck = doFsck(conf, false);
+    assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NO_VERSION_FILE });
+    // fix hbase.version missing
+    doFsck(conf, true);
+
+    // no version file fixed
+    assertNoErrors(doFsck(conf, false));
+  }
+
+  @Test (timeout=180000)
+  public void testNoTableState() throws Exception {
+    // delete the hbase.version file
+    TableName table =
+        TableName.valueOf("testNoTableState");
+    try {
+      setupTable(table);
+      // make sure data in regions, if in wal only there is no data loss
+      admin.flush(table);
+
+      MetaTableAccessor.deleteTableState(TEST_UTIL.getConnection(), table);
+
+      // test
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NO_TABLE_STATE });
+      // fix table state missing
+      doFsck(conf, true);
+
+      assertNoErrors(doFsck(conf, false));
+      assertTrue(TEST_UTIL.getHBaseAdmin().isTableEnabled(table));
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * This creates two tables and mess both of them and fix them one by one
+   */
+  @Test (timeout=180000)
+  public void testFixByTable() throws Exception {
+    TableName table1 =
+        TableName.valueOf("testFixByTable1");
+    TableName table2 =
+        TableName.valueOf("testFixByTable2");
+    try {
+      setupTable(table1);
+      // make sure data in regions, if in wal only there is no data loss
+      admin.flush(table1);
+      // Mess them up by leaving a hole in the hdfs data
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
+          Bytes.toBytes("C"), false, false, true); // don't rm meta
+
+      setupTable(table2);
+      // make sure data in regions, if in wal only there is no data loss
+      admin.flush(table2);
+      // Mess them up by leaving a hole in the hdfs data
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), false,
+          false, true); // don't rm meta
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS, HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS });
+
+      // fix hole in table 1
+      doFsck(conf, true, table1);
+      // check that hole in table 1 fixed
+      assertNoErrors(doFsck(conf, false, table1));
+      // check that hole in table 2 still there
+      assertErrors(doFsck(conf, false, table2), new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS });
+
+      // fix hole in table 2
+      doFsck(conf, true, table2);
+      // check that hole in both tables fixed
+      assertNoErrors(doFsck(conf, false));
+      assertEquals(ROWKEYS.length - 2, countRows());
+    } finally {
+      cleanupTable(table1);
+      cleanupTable(table2);
+    }
+  }
+  /**
+   * A split parent in meta, in hdfs, and not deployed
+   */
+  @Test (timeout=180000)
+  public void testLingeringSplitParent() throws Exception {
+    TableName table =
+        TableName.valueOf("testLingeringSplitParent");
+    Table meta = null;
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // make sure data in regions, if in wal only there is no data loss
+      admin.flush(table);
+
+      HRegionLocation location;
+      try(RegionLocator rl = connection.getRegionLocator(tbl.getName())) {
+        location = rl.getRegionLocation(Bytes.toBytes("B"));
+      }
+
+      // Delete one region from meta, but not hdfs, unassign it.
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
+          Bytes.toBytes("C"), true, true, false);
+
+      // Create a new meta entry to fake it as a split parent.
+      meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
+      HRegionInfo hri = location.getRegionInfo();
+
+      HRegionInfo a = new HRegionInfo(tbl.getName(),
+          Bytes.toBytes("B"), Bytes.toBytes("BM"));
+      HRegionInfo b = new HRegionInfo(tbl.getName(),
+          Bytes.toBytes("BM"), Bytes.toBytes("C"));
+
+      hri.setOffline(true);
+      hri.setSplit(true);
+
+      MetaTableAccessor.addRegionToMeta(meta, hri, a, b);
+      meta.close();
+      admin.flush(TableName.META_TABLE_NAME);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
+          HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_SPLIT_PARENT, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN});
+
+      // regular repair cannot fix lingering split parent
+      hbck = doFsck(conf, true);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
+          HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_SPLIT_PARENT, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
+      assertFalse(hbck.shouldRerun());
+      hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
+          HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_SPLIT_PARENT, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN});
+
+      // fix lingering split parent
+      hbck = new HBaseFsck(conf, hbfsckExecutorService);
+      hbck.connect();
+      HBaseFsck.setDisplayFullReport(); // i.e. -details
+      hbck.setTimeLag(0);
+      hbck.setFixSplitParents(true);
+      hbck.onlineHbck();
+      assertTrue(hbck.shouldRerun());
+      hbck.close();
+
+      Get get = new Get(hri.getRegionName());
+      Result result = meta.get(get);
+      assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
+          HConstants.SPLITA_QUALIFIER).isEmpty());
+      assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
+          HConstants.SPLITB_QUALIFIER).isEmpty());
+      admin.flush(TableName.META_TABLE_NAME);
+
+      // fix other issues
+      doFsck(conf, true);
+
+      // check that all are fixed
+      assertNoErrors(doFsck(conf, false));
+      assertEquals(ROWKEYS.length, countRows());
+    } finally {
+      cleanupTable(table);
+      IOUtils.closeQuietly(meta);
+    }
+  }
+
+  /**
+   * Tests that LINGERING_SPLIT_PARENT is not erroneously reported for
+   * valid cases where the daughters are there.
+   */
+  @Test (timeout=180000)
+  public void testValidLingeringSplitParent() throws Exception {
+    TableName table =
+        TableName.valueOf("testLingeringSplitParent");
+    Table meta = null;
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // make sure data in regions, if in wal only there is no data loss
+      admin.flush(table);
+
+      try(RegionLocator rl = connection.getRegionLocator(tbl.getName())) {
+        HRegionLocation location = rl.getRegionLocation(Bytes.toBytes("B"));
+
+        meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
+        HRegionInfo hri = location.getRegionInfo();
+
+        // do a regular split
+        byte[] regionName = location.getRegionInfo().getRegionName();
+        admin.splitRegion(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
+        TestEndToEndSplitTransaction.blockUntilRegionSplit(conf, 60000, regionName, true);
+
+        // TODO: fixHdfsHoles does not work against splits, since the parent dir lingers on
+        // for some time until children references are deleted. HBCK erroneously sees this as
+        // overlapping regions
+        HBaseFsck hbck = doFsck(conf, true, true, false, false, false, true, true, true, false,
+            false, null);
+        assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {}); //no LINGERING_SPLIT_PARENT reported
+
+        // assert that the split hbase:meta entry is still there.
+        Get get = new Get(hri.getRegionName());
+        Result result = meta.get(get);
+        assertNotNull(result);
+        assertNotNull(MetaTableAccessor.getHRegionInfo(result));
+
+        assertEquals(ROWKEYS.length, countRows());
+
+        // assert that we still have the split regions
+        assertEquals(rl.getStartKeys().length, SPLITS.length + 1 + 1); //SPLITS + 1 is # regions
+        // pre-split.
+        assertNoErrors(doFsck(conf, false));
+      }
+    } finally {
+      cleanupTable(table);
+      IOUtils.closeQuietly(meta);
+    }
+  }
+
+  /**
+   * Split crashed after write to hbase:meta finished for the parent region, but
+   * failed to write daughters (pre HBASE-7721 codebase)
+   */
+  @Test(timeout=75000)
+  public void testSplitDaughtersNotInMeta() throws Exception {
+    TableName table = TableName.valueOf("testSplitdaughtersNotInMeta");
+    Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // make sure data in regions, if in wal only there is no data loss
+      admin.flush(table);
+
+      try(RegionLocator rl = connection.getRegionLocator(tbl.getName())) {
+        HRegionLocation location = rl.getRegionLocation(Bytes.toBytes("B"));
+
+        HRegionInfo hri = location.getRegionInfo();
+
+        // do a regular split
+        byte[] regionName = location.getRegionInfo().getRegionName();
+        admin.splitRegion(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
+        TestEndToEndSplitTransaction.blockUntilRegionSplit(conf, 60000, regionName, true);
+
+        PairOfSameType<HRegionInfo> daughters = MetaTableAccessor.getDaughterRegions(
+            meta.get(new Get(regionName)));
+
+        // Delete daughter regions from meta, but not hdfs, unassign it.
+
+        ServerName firstSN =
+            rl.getRegionLocation(daughters.getFirst().getStartKey()).getServerName();
+        ServerName secondSN =
+            rl.getRegionLocation(daughters.getSecond().getStartKey()).getServerName();
+
+        undeployRegion(connection, firstSN, daughters.getFirst());
+        undeployRegion(connection, secondSN, daughters.getSecond());
+
+        List<Delete> deletes = new ArrayList<>();
+        deletes.add(new Delete(daughters.getFirst().getRegionName()));
+        deletes.add(new Delete(daughters.getSecond().getRegionName()));
+        meta.delete(deletes);
+
+        // Remove daughters from regionStates
+        RegionStates regionStates = TEST_UTIL.getMiniHBaseCluster().getMaster().
+            getAssignmentManager().getRegionStates();
+        regionStates.deleteRegion(daughters.getFirst());
+        regionStates.deleteRegion(daughters.getSecond());
+
+        HBaseFsck hbck = doFsck(conf, false);
+        assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
+            HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
+            HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
+            HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); //no LINGERING_SPLIT_PARENT
+
+        // now fix it. The fix should not revert the region split, but add daughters to META
+        hbck = doFsck(conf, true, true, false, false, false, false, false, false, false,
+            false, null);
+        assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
+            HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
+            HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
+            HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
+
+        // assert that the split hbase:meta entry is still there.
+        Get get = new Get(hri.getRegionName());
+        Result result = meta.get(get);
+        assertNotNull(result);
+        assertNotNull(MetaTableAccessor.getHRegionInfo(result));
+
+        assertEquals(ROWKEYS.length, countRows());
+
+        // assert that we still have the split regions
+        assertEquals(rl.getStartKeys().length, SPLITS.length + 1 + 1); //SPLITS + 1 is # regions
+        // pre-split.
+        assertNoErrors(doFsck(conf, false)); //should be fixed by now
+      }
+    } finally {
+      meta.close();
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * This creates and fixes a bad table with a missing region which is the 1st region -- hole in
+   * meta and data missing in the fs.
+   */
+  @Test(timeout=120000)
+  public void testMissingFirstRegion() throws Exception {
+    TableName table = TableName.valueOf("testMissingFirstRegion");
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // Mess it up by leaving a hole in the assignment, meta, and hdfs data
+      admin.disableTable(table);
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""), Bytes.toBytes("A"), true,
+          true, true);
+      admin.enableTable(table);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY });
+      // fix hole
+      doFsck(conf, true);
+      // check that hole fixed
+      assertNoErrors(doFsck(conf, false));
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * This creates and fixes a bad table with a missing region which is the 1st region -- hole in
+   * meta and data missing in the fs.
+   */
+  @Test(timeout=120000)
+  public void testRegionDeployedNotInHdfs() throws Exception {
+    TableName table =
+        TableName.valueOf("testSingleRegionDeployedNotInHdfs");
+    try {
+      setupTable(table);
+      admin.flush(table);
+
+      // Mess it up by deleting region dir
+      deleteRegion(conf, tbl.getTableDescriptor(),
+          HConstants.EMPTY_START_ROW, Bytes.toBytes("A"), false,
+          false, true);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS });
+      // fix hole
+      doFsck(conf, true);
+      // check that hole fixed
+      assertNoErrors(doFsck(conf, false));
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * This creates and fixes a bad table with missing last region -- hole in meta and data missing in
+   * the fs.
+   */
+  @Test(timeout=120000)
+  public void testMissingLastRegion() throws Exception {
+    TableName table =
+        TableName.valueOf("testMissingLastRegion");
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // Mess it up by leaving a hole in the assignment, meta, and hdfs data
+      admin.disableTable(table);
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"), Bytes.toBytes(""), true,
+          true, true);
+      admin.enableTable(table);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY });
+      // fix hole
+      doFsck(conf, true);
+      // check that hole fixed
+      assertNoErrors(doFsck(conf, false));
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * Test -noHdfsChecking option can detect and fix assignments issue.
+   */
+  @Test (timeout=180000)
+  public void testFixAssignmentsAndNoHdfsChecking() throws Exception {
+    TableName table =
+        TableName.valueOf("testFixAssignmentsAndNoHdfsChecking");
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // Mess it up by closing a region
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"), true,
+          false, false, false, HRegionInfo.DEFAULT_REPLICA_ID);
+
+      // verify there is no other errors
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck,
+          new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_DEPLOYED, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
+
+      // verify that noHdfsChecking report the same errors
+      HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
+      fsck.connect();
+      HBaseFsck.setDisplayFullReport(); // i.e. -details
+      fsck.setTimeLag(0);
+      fsck.setCheckHdfs(false);
+      fsck.onlineHbck();
+      assertErrors(fsck,
+          new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_DEPLOYED, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
+      fsck.close();
+
+      // verify that fixAssignments works fine with noHdfsChecking
+      fsck = new HBaseFsck(conf, hbfsckExecutorService);
+      fsck.connect();
+      HBaseFsck.setDisplayFullReport(); // i.e. -details
+      fsck.setTimeLag(0);
+      fsck.setCheckHdfs(false);
+      fsck.setFixAssignments(true);
+      fsck.onlineHbck();
+      assertTrue(fsck.shouldRerun());
+      fsck.onlineHbck();
+      assertNoErrors(fsck);
+
+      assertEquals(ROWKEYS.length, countRows());
+
+      fsck.close();
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * Test -noHdfsChecking option can detect region is not in meta but deployed.
+   * However, it can not fix it without checking Hdfs because we need to get
+   * the region info from Hdfs in this case, then to patch the meta.
+   */
+  @Test (timeout=180000)
+  public void testFixMetaNotWorkingWithNoHdfsChecking() throws Exception {
+    TableName table =
+        TableName.valueOf("testFixMetaNotWorkingWithNoHdfsChecking");
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // Mess it up by deleting a region from the metadata
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
+          Bytes.toBytes("B"), false, true, false, false, HRegionInfo.DEFAULT_REPLICA_ID);
+
+      // verify there is no other errors
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck,
+          new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
+
+      // verify that noHdfsChecking report the same errors
+      HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
+      fsck.connect();
+      HBaseFsck.setDisplayFullReport(); // i.e. -details
+      fsck.setTimeLag(0);
+      fsck.setCheckHdfs(false);
+      fsck.onlineHbck();
+      assertErrors(fsck,
+          new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
+      fsck.close();
+
+      // verify that fixMeta doesn't work with noHdfsChecking
+      fsck = new HBaseFsck(conf, hbfsckExecutorService);
+      fsck.connect();
+      HBaseFsck.setDisplayFullReport(); // i.e. -details
+      fsck.setTimeLag(0);
+      fsck.setCheckHdfs(false);
+      fsck.setFixAssignments(true);
+      fsck.setFixMeta(true);
+      fsck.onlineHbck();
+      assertFalse(fsck.shouldRerun());
+      assertErrors(fsck,
+          new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
+      fsck.close();
+
+      // fix the cluster so other tests won't be impacted
+      fsck = doFsck(conf, true);
+      assertTrue(fsck.shouldRerun());
+      fsck = doFsck(conf, true);
+      assertNoErrors(fsck);
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * Test -fixHdfsHoles doesn't work with -noHdfsChecking option,
+   * and -noHdfsChecking can't detect orphan Hdfs region.
+   */
+  @Test (timeout=180000)
+  public void testFixHdfsHolesNotWorkingWithNoHdfsChecking() throws Exception {
+    TableName table =
+        TableName.valueOf("testFixHdfsHolesNotWorkingWithNoHdfsChecking");
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // Mess it up by creating an overlap in the metadata
+      admin.disableTable(table);
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"), true,
+          true, false, true, HRegionInfo.DEFAULT_REPLICA_ID);
+      admin.enableTable(table);
+
+      HRegionInfo hriOverlap =
+          createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B"));
+      TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
+      TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
+          .waitForAssignment(hriOverlap);
+      ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
+      TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
+          HBaseFsck.ErrorReporter.ERROR_CODE.ORPHAN_HDFS_REGION, HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
+          HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN});
+
+      // verify that noHdfsChecking can't detect ORPHAN_HDFS_REGION
+      HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
+      fsck.connect();
+      HBaseFsck.setDisplayFullReport(); // i.e. -details
+      fsck.setTimeLag(0);
+      fsck.setCheckHdfs(false);
+      fsck.onlineHbck();
+      assertErrors(fsck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
+      fsck.close();
+
+      // verify that fixHdfsHoles doesn't work with noHdfsChecking
+      fsck = new HBaseFsck(conf, hbfsckExecutorService);
+      fsck.connect();
+      HBaseFsck.setDisplayFullReport(); // i.e. -details
+      fsck.setTimeLag(0);
+      fsck.setCheckHdfs(false);
+      fsck.setFixHdfsHoles(true);
+      fsck.setFixHdfsOverlaps(true);
+      fsck.setFixHdfsOrphans(true);
+      fsck.onlineHbck();
+      assertFalse(fsck.shouldRerun());
+      assertErrors(fsck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
+      fsck.close();
+    } finally {
+      if (admin.isTableDisabled(table)) {
+        admin.enableTable(table);
+      }
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * This creates a table and then corrupts an hfile.  Hbck should quarantine the file.
+   */
+  @Test(timeout=180000)
+  public void testQuarantineCorruptHFile() throws Exception {
+    TableName table = TableName.valueOf(name.getMethodName());
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+      admin.flush(table); // flush is async.
+
+      FileSystem fs = FileSystem.get(conf);
+      Path hfile = getFlushedHFile(fs, table);
+
+      // Mess it up by leaving a hole in the assignment, meta, and hdfs data
+      admin.disableTable(table);
+
+      // create new corrupt file called deadbeef (valid hfile name)
+      Path corrupt = new Path(hfile.getParent(), "deadbeef");
+      TestHFile.truncateFile(fs, hfile, corrupt);
+      LOG.info("Created corrupted file " + corrupt);
+      HBaseFsck.debugLsr(conf, FSUtils.getRootDir(conf));
+
+      // we cannot enable here because enable never finished due to the corrupt region.
+      HBaseFsck res = HbckTestingUtil.doHFileQuarantine(conf, table);
+      assertEquals(res.getRetCode(), 0);
+      HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
+      assertEquals(hfcc.getHFilesChecked(), 5);
+      assertEquals(hfcc.getCorrupted().size(), 1);
+      assertEquals(hfcc.getFailures().size(), 0);
+      assertEquals(hfcc.getQuarantined().size(), 1);
+      assertEquals(hfcc.getMissing().size(), 0);
+
+      // Its been fixed, verify that we can enable.
+      admin.enableTable(table);
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * This creates a table and simulates the race situation where a concurrent compaction or split
+   * has removed an hfile after the corruption checker learned about it.
+   */
+  @Test(timeout=180000)
+  public void testQuarantineMissingHFile() throws Exception {
+    TableName table = TableName.valueOf(name.getMethodName());
+
+    // inject a fault in the hfcc created.
+    final FileSystem fs = FileSystem.get(conf);
+    HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) {
+      @Override
+      public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles)
+          throws IOException {
+        return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
+          AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
+          @Override
+          protected void checkHFile(Path p) throws IOException {
+            if (attemptedFirstHFile.compareAndSet(false, true)) {
+              assertTrue(fs.delete(p, true)); // make sure delete happened.
+            }
+            super.checkHFile(p);
+          }
+        };
+      }
+    };
+    doQuarantineTest(table, hbck, 4, 0, 0, 0, 1); // 4 attempted, but 1 missing.
+    hbck.close();
+  }
+
+  /**
+   * This creates and fixes a bad table with regions that has startkey == endkey
+   */
+  @Test (timeout=180000)
+  public void testDegenerateRegions() throws Exception {
+    TableName table = TableName.valueOf("tableDegenerateRegions");
+    try {
+      setupTable(table);
+      assertNoErrors(doFsck(conf, false));
+      assertEquals(ROWKEYS.length, countRows());
+
+      // Now let's mess it up, by adding a region with a duplicate startkey
+      HRegionInfo hriDupe =
+          createRegion(tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("B"));
+      TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
+      TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
+          .waitForAssignment(hriDupe);
+      ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
+      TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
+
+      HBaseFsck hbck = doFsck(conf,false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.DEGENERATE_REGION, HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS,
+          HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS });
+      assertEquals(2, hbck.getOverlapGroups(table).size());
+      assertEquals(ROWKEYS.length, countRows());
+
+      // fix the degenerate region.
+      doFsck(conf, true);
+
+      // check that the degenerate region is gone and no data loss
+      HBaseFsck hbck2 = doFsck(conf,false);
+      assertNoErrors(hbck2);
+      assertEquals(0, hbck2.getOverlapGroups(table).size());
+      assertEquals(ROWKEYS.length, countRows());
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * Test mission REGIONINFO_QUALIFIER in hbase:meta
+   */
+  @Test (timeout=180000)
+  public void testMissingRegionInfoQualifier() throws Exception {
+    Connection connection = ConnectionFactory.createConnection(conf);
+    TableName table = TableName.valueOf("testMissingRegionInfoQualifier");
+    try {
+      setupTable(table);
+
+      // Mess it up by removing the RegionInfo for one region.
+      final List<Delete> deletes = new LinkedList<Delete>();
+      Table meta = connection.getTable(TableName.META_TABLE_NAME, hbfsckExecutorService);
+      MetaTableAccessor.fullScanRegions(connection, new MetaTableAccessor.Visitor() {
+
+        @Override
+        public boolean visit(Result rowResult) throws IOException {
+          HRegionInfo hri = MetaTableAccessor.getHRegionInfo(rowResult);
+          if (hri != null && !hri.getTable().isSystemTable()) {
+            Delete delete = new Delete(rowResult.getRow());
+            delete.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
+            deletes.add(delete);
+          }
+          return true;
+        }
+      });
+      meta.delete(deletes);
+
+      // Mess it up by creating a fake hbase:meta entry with no associated RegionInfo
+      meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")).add(
+          HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, Bytes.toBytes("node1:60020")));
+      meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")).add(
+          HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER, Bytes.toBytes(1362150791183L)));
+      meta.close();
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertTrue(hbck.getErrors().getErrorList().contains(HBaseFsck.ErrorReporter.ERROR_CODE.EMPTY_META_CELL));
+
+      // fix reference file
+      hbck = doFsck(conf, true);
+
+      // check that reference file fixed
+      assertFalse(hbck.getErrors().getErrorList().contains(HBaseFsck.ErrorReporter.ERROR_CODE.EMPTY_META_CELL));
+    } finally {
+      cleanupTable(table);
+    }
+    connection.close();
+  }
+
+  /**
+   * Test pluggable error reporter. It can be plugged in
+   * from system property or configuration.
+   */
+  @Test (timeout=180000)
+  public void testErrorReporter() throws Exception {
+    try {
+      MockErrorReporter.calledCount = 0;
+      doFsck(conf, false);
+      assertEquals(MockErrorReporter.calledCount, 0);
+
+      conf.set("hbasefsck.errorreporter", MockErrorReporter.class.getName());
+      doFsck(conf, false);
+      assertTrue(MockErrorReporter.calledCount > 20);
+    } finally {
+      conf.set("hbasefsck.errorreporter",
+          HBaseFsck.PrintingErrorReporter.class.getName());
+      MockErrorReporter.calledCount = 0;
+    }
+  }
+
+  @Test(timeout=60000)
+  public void testCheckTableLocks() throws Exception {
+    IncrementingEnvironmentEdge edge = new IncrementingEnvironmentEdge(0);
+    EnvironmentEdgeManager.injectEdge(edge);
+    // check no errors
+    HBaseFsck hbck = doFsck(conf, false);
+    assertNoErrors(hbck);
+
+    ServerName mockName = ServerName.valueOf("localhost", 60000, 1);
+    final TableName tableName = TableName.valueOf("foo");
+
+    // obtain one lock
+    final TableLockManager tableLockManager =
+        TableLockManager.createTableLockManager(conf, TEST_UTIL.getZooKeeperWatcher(), mockName);
+    TableLockManager.TableLock
+        writeLock = tableLockManager.writeLock(tableName, "testCheckTableLocks");
+    writeLock.acquire();
+    hbck = doFsck(conf, false);
+    assertNoErrors(hbck); // should not have expired, no problems
+
+    edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT,
+        TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS)); // let table lock expire
+
+    hbck = doFsck(conf, false);
+    assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {HBaseFsck.ErrorReporter.ERROR_CODE.EXPIRED_TABLE_LOCK});
+
+    final CountDownLatch latch = new CountDownLatch(1);
+    new Thread() {
+      @Override
+      public void run() {
+        TableLockManager.TableLock
+            readLock = tableLockManager.writeLock(tableName, "testCheckTableLocks");
+        try {
+          latch.countDown();
+          readLock.acquire();
+        } catch (IOException ex) {
+          fail();
+        } catch (IllegalStateException ex) {
+          return; // expected, since this will be reaped under us.
+        }
+        fail("should not have come here");
+      };
+    }.start();
+
+    latch.await(); // wait until thread starts
+    Threads.sleep(300); // wait some more to ensure writeLock.acquire() is called
+
+    hbck = doFsck(conf, false);
+    assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
+        HBaseFsck.ErrorReporter.ERROR_CODE.EXPIRED_TABLE_LOCK}); // still one expired, one not-expired
+
+    edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT,
+        TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS)); // let table lock expire
+
+    hbck = doFsck(conf, false);
+    assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {HBaseFsck.ErrorReporter.ERROR_CODE.EXPIRED_TABLE_LOCK,
+        HBaseFsck.ErrorReporter.ERROR_CODE.EXPIRED_TABLE_LOCK}); // both are expired
+
+    conf.setLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT, 1);
+    // reaping from ZKInterProcessWriteLock uses znode cTime,
+    // which is not injectable through EnvironmentEdge
+
+    Threads.sleep(10);
+    hbck = doFsck(conf, true); // now fix both cases
+
+    hbck = doFsck(conf, false);
+    assertNoErrors(hbck);
+
+    // ensure that locks are deleted
+    writeLock = tableLockManager.writeLock(tableName, "should acquire without blocking");
+    writeLock.acquire(); // this should not block.
+    writeLock.release(); // release for clean state
+    tableLockManager.tableDeleted(tableName);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/fbd2ed2e/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckReplicas.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckReplicas.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckReplicas.java
new file mode 100644
index 0000000..1b794ae
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckReplicas.java
@@ -0,0 +1,257 @@
+package org.apache.hadoop.hbase.util;
+
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.MetaTableAccessor;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.ClusterConnection;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.RegionReplicaUtil;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
+import org.apache.hadoop.hbase.master.AssignmentManager;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MiscTests;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.NavigableMap;
+import java.util.Set;
+import java.util.concurrent.ScheduledThreadPoolExecutor;
+import java.util.concurrent.SynchronousQueue;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+
+import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.*;
+import static org.junit.Assert.*;
+
+@Category({MiscTests.class, LargeTests.class})
+public class TestHBaseFsckReplicas extends BaseTestHBaseFsck {
+  @BeforeClass
+  public static void setUpBeforeClass() throws Exception {
+    TEST_UTIL.getConfiguration().set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY,
+        MasterSyncObserver.class.getName());
+
+    conf.setInt("hbase.regionserver.handler.count", 2);
+    conf.setInt("hbase.regionserver.metahandler.count", 30);
+
+    conf.setInt("hbase.htable.threads.max", POOL_SIZE);
+    conf.setInt("hbase.hconnection.threads.max", 2 * POOL_SIZE);
+    conf.setInt("hbase.hconnection.threads.core", POOL_SIZE);
+    conf.setInt("hbase.hbck.close.timeout", 2 * REGION_ONLINE_TIMEOUT);
+    conf.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY, 8 * REGION_ONLINE_TIMEOUT);
+    TEST_UTIL.startMiniCluster(3);
+
+    tableExecutorService = new ThreadPoolExecutor(1, POOL_SIZE, 60, TimeUnit.SECONDS,
+        new SynchronousQueue<Runnable>(), Threads.newDaemonThreadFactory("testhbck"));
+
+    hbfsckExecutorService = new ScheduledThreadPoolExecutor(POOL_SIZE);
+
+    AssignmentManager assignmentManager =
+        TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
+    regionStates = assignmentManager.getRegionStates();
+
+    connection = (ClusterConnection) TEST_UTIL.getConnection();
+
+    admin = connection.getAdmin();
+    admin.setBalancerRunning(false, true);
+
+    TEST_UTIL.waitUntilAllRegionsAssigned(TableName.META_TABLE_NAME);
+    TEST_UTIL.waitUntilAllRegionsAssigned(TableName.NAMESPACE_TABLE_NAME);
+  }
+
+  @AfterClass
+  public static void tearDownAfterClass() throws Exception {
+    tableExecutorService.shutdown();
+    hbfsckExecutorService.shutdown();
+    admin.close();
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  @Before
+  public void setUp() {
+    EnvironmentEdgeManager.reset();
+  }
+
+  /*
+ * This creates a table with region_replica > 1 and verifies hbck runs
+ * successfully
+ */
+  @Test(timeout=180000)
+  public void testHbckWithRegionReplica() throws Exception {
+    TableName table =
+        TableName.valueOf("testHbckWithRegionReplica");
+    try {
+      setupTableWithRegionReplica(table, 2);
+      admin.flush(table);
+      assertNoErrors(doFsck(conf, false));
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  @Test (timeout=180000)
+  public void testHbckWithFewerReplica() throws Exception {
+    TableName table =
+        TableName.valueOf("testHbckWithFewerReplica");
+    try {
+      setupTableWithRegionReplica(table, 2);
+      admin.flush(table);
+      assertNoErrors(doFsck(conf, false));
+      assertEquals(ROWKEYS.length, countRows());
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true,
+          false, false, false, 1); // unassign one replica
+      // check that problem exists
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_DEPLOYED });
+      // fix the problem
+      hbck = doFsck(conf, true);
+      // run hbck again to make sure we don't see any errors
+      hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {});
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  @Test (timeout=180000)
+  public void testHbckWithExcessReplica() throws Exception {
+    TableName table =
+        TableName.valueOf("testHbckWithExcessReplica");
+    try {
+      setupTableWithRegionReplica(table, 2);
+      admin.flush(table);
+      assertNoErrors(doFsck(conf, false));
+      assertEquals(ROWKEYS.length, countRows());
+      // the next few lines inject a location in meta for a replica, and then
+      // asks the master to assign the replica (the meta needs to be injected
+      // for the master to treat the request for assignment as valid; the master
+      // checks the region is valid either from its memory or meta)
+      Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
+      List<HRegionInfo> regions = admin.getTableRegions(table);
+      byte[] startKey = Bytes.toBytes("B");
+      byte[] endKey = Bytes.toBytes("C");
+      byte[] metaKey = null;
+      HRegionInfo newHri = null;
+      for (HRegionInfo h : regions) {
+        if (Bytes.compareTo(h.getStartKey(), startKey) == 0  &&
+            Bytes.compareTo(h.getEndKey(), endKey) == 0 &&
+            h.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
+          metaKey = h.getRegionName();
+          //create a hri with replicaId as 2 (since we already have replicas with replicaid 0 and 1)
+          newHri = RegionReplicaUtil.getRegionInfoForReplica(h, 2);
+          break;
+        }
+      }
+      Put put = new Put(metaKey);
+      Collection<ServerName> var = admin.getClusterStatus().getServers();
+      ServerName sn = var.toArray(new ServerName[var.size()])[0];
+      //add a location with replicaId as 2 (since we already have replicas with replicaid 0 and 1)
+      MetaTableAccessor.addLocation(put, sn, sn.getStartcode(), -1, 2);
+      meta.put(put);
+      // assign the new replica
+      HBaseFsckRepair.fixUnassigned(admin, newHri);
+      HBaseFsckRepair.waitUntilAssigned(admin, newHri);
+      // now reset the meta row to its original value
+      Delete delete = new Delete(metaKey);
+      delete.addColumns(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(2));
+      delete.addColumns(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(2));
+      delete.addColumns(HConstants.CATALOG_FAMILY, MetaTableAccessor.getSeqNumColumn(2));
+      meta.delete(delete);
+      meta.close();
+      // check that problem exists
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[]{HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META});
+      // fix the problem
+      hbck = doFsck(conf, true);
+      // run hbck again to make sure we don't see any errors
+      hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[]{});
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
+   * This creates and fixes a bad table with a region that is in meta but has
+   * no deployment or data hdfs. The table has region_replication set to 2.
+   */
+  @Test (timeout=180000)
+  public void testNotInHdfsWithReplicas() throws Exception {
+    TableName table =
+        TableName.valueOf("tableNotInHdfs");
+    try {
+      HRegionInfo[] oldHris = new HRegionInfo[2];
+      setupTableWithRegionReplica(table, 2);
+      assertEquals(ROWKEYS.length, countRows());
+      NavigableMap<HRegionInfo, ServerName> map =
+          MetaTableAccessor.allTableRegions(TEST_UTIL.getConnection(),
+              tbl.getName());
+      int i = 0;
+      // store the HRIs of the regions we will mess up
+      for (Map.Entry<HRegionInfo, ServerName> m : map.entrySet()) {
+        if (m.getKey().getStartKey().length > 0 &&
+            m.getKey().getStartKey()[0] == Bytes.toBytes("B")[0]) {
+          LOG.debug("Initially server hosting " + m.getKey() + " is " + m.getValue());
+          oldHris[i++] = m.getKey();
+        }
+      }
+      // make sure data in regions
+      admin.flush(table);
+
+      // Mess it up by leaving a hole in the hdfs data
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), false,
+          false, true); // don't rm meta
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS });
+
+      // fix hole
+      doFsck(conf, true);
+
+      // check that hole fixed
+      assertNoErrors(doFsck(conf, false));
+      assertEquals(ROWKEYS.length - 2, countRows());
+
+      // the following code checks whether the old primary/secondary has
+      // been unassigned and the new primary/secondary has been assigned
+      i = 0;
+      HRegionInfo[] newHris = new HRegionInfo[2];
+      // get all table's regions from meta
+      map = MetaTableAccessor.allTableRegions(TEST_UTIL.getConnection(), tbl.getName());
+      // get the HRIs of the new regions (hbck created new regions for fixing the hdfs mess-up)
+      for (Map.Entry<HRegionInfo, ServerName> m : map.entrySet()) {
+        if (m.getKey().getStartKey().length > 0 &&
+            m.getKey().getStartKey()[0] == Bytes.toBytes("B")[0]) {
+          newHris[i++] = m.getKey();
+        }
+      }
+      // get all the online regions in the regionservers
+      Collection<ServerName> servers = admin.getClusterStatus().getServers();
+      Set<HRegionInfo> onlineRegions = new HashSet<HRegionInfo>();
+      for (ServerName s : servers) {
+        List<HRegionInfo> list = admin.getOnlineRegions(s);
+        onlineRegions.addAll(list);
+      }
+      // the new HRIs must be a subset of the online regions
+      assertTrue(onlineRegions.containsAll(Arrays.asList(newHris)));
+      // the old HRIs must not be part of the set (removeAll would return false if
+      // the set didn't change)
+      assertFalse(onlineRegions.removeAll(Arrays.asList(oldHris)));
+    } finally {
+      cleanupTable(table);
+      admin.close();
+    }
+  }
+
+}