You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2017/12/15 20:19:57 UTC
[1/2] hbase git commit: HBASE-19272 Deal with HBCK tests disabled by
HBASE-14614 AMv2 when HBCK works again...
Repository: hbase
Updated Branches:
refs/heads/master 75f512bd7 -> 89e2869e2
http://git-wip-us.apache.org/repos/asf/hbase/blob/89e2869e/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckTwoRS.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckTwoRS.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckTwoRS.java
deleted file mode 100644
index 6fa455a..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckTwoRS.java
+++ /dev/null
@@ -1,674 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.util;
-
-import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
-import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertNoErrors;
-import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-
-import java.util.concurrent.ScheduledThreadPoolExecutor;
-import java.util.concurrent.SynchronousQueue;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HRegionLocation;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.MetaTableAccessor;
-import org.apache.hadoop.hbase.MiniHBaseCluster;
-import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.ClusterConnection;
-import org.apache.hadoop.hbase.client.Durability;
-import org.apache.hadoop.hbase.client.HBaseAdmin;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.RegionInfo;
-import org.apache.hadoop.hbase.client.RegionInfoBuilder;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
-import org.apache.hadoop.hbase.io.HFileLink;
-import org.apache.hadoop.hbase.io.hfile.HFile;
-import org.apache.hadoop.hbase.io.hfile.HFileContext;
-import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
-import org.apache.hadoop.hbase.master.RegionState;
-import org.apache.hadoop.hbase.regionserver.HRegionServer;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MiscTests;
-import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Ignore;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Multimap;
-import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
-
-@Ignore // Until after HBASE-14614 goes in.
-@Category({MiscTests.class, LargeTests.class})
-public class TestHBaseFsckTwoRS extends BaseTestHBaseFsck {
- @Rule
- public TestName name = new TestName();
-
- @BeforeClass
- public static void setUpBeforeClass() throws Exception {
- TEST_UTIL.getConfiguration().set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY,
- MasterSyncCoprocessor.class.getName());
-
- conf.setInt("hbase.regionserver.handler.count", 2);
- conf.setInt("hbase.regionserver.metahandler.count", 30);
-
- conf.setInt("hbase.htable.threads.max", POOL_SIZE);
- conf.setInt("hbase.hconnection.threads.max", 2 * POOL_SIZE);
- conf.setInt("hbase.hbck.close.timeout", 2 * REGION_ONLINE_TIMEOUT);
- conf.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY, 8 * REGION_ONLINE_TIMEOUT);
- TEST_UTIL.startMiniCluster(2);
-
- tableExecutorService = new ThreadPoolExecutor(1, POOL_SIZE, 60, TimeUnit.SECONDS,
- new SynchronousQueue<>(), Threads.newDaemonThreadFactory("testhbck"));
-
- hbfsckExecutorService = new ScheduledThreadPoolExecutor(POOL_SIZE);
-
- assignmentManager = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
- regionStates = assignmentManager.getRegionStates();
-
- connection = (ClusterConnection) TEST_UTIL.getConnection();
-
- admin = connection.getAdmin();
- admin.setBalancerRunning(false, true);
-
- TEST_UTIL.waitUntilAllRegionsAssigned(TableName.META_TABLE_NAME);
- TEST_UTIL.waitUntilAllRegionsAssigned(TableName.NAMESPACE_TABLE_NAME);
- }
-
- @AfterClass
- public static void tearDownAfterClass() throws Exception {
- tableExecutorService.shutdown();
- hbfsckExecutorService.shutdown();
- admin.close();
- TEST_UTIL.shutdownMiniCluster();
- }
-
- @Before
- public void setUp() {
- EnvironmentEdgeManager.reset();
- }
-
- @Test(timeout=180000)
- public void testFixAssignmentsWhenMETAinTransition() throws Exception {
- MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
- admin.unassign(RegionInfoBuilder.FIRST_META_REGIONINFO.getRegionName(), true);
- assignmentManager.offlineRegion(RegionInfoBuilder.FIRST_META_REGIONINFO);
- new MetaTableLocator().deleteMetaLocation(cluster.getMaster().getZooKeeper());
- assertFalse(regionStates.isRegionOnline(RegionInfoBuilder.FIRST_META_REGIONINFO));
- HBaseFsck hbck = doFsck(conf, true);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.UNKNOWN, HBaseFsck.ErrorReporter.ERROR_CODE.NO_META_REGION,
- HBaseFsck.ErrorReporter.ERROR_CODE.NULL_META_REGION });
- assertNoErrors(doFsck(conf, false));
- }
-
- /**
- * This create and fixes a bad table with regions that have a duplicate
- * start key
- */
- @Test (timeout=180000)
- public void testDupeStartKey() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- setupTable(tableName);
- assertNoErrors(doFsck(conf, false));
- assertEquals(ROWKEYS.length, countRows());
-
- // Now let's mess it up, by adding a region with a duplicate startkey
- RegionInfo hriDupe =
- createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("A2"));
- TEST_UTIL.assignRegion(hriDupe);
-
- ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
- TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
-
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS, HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS });
- assertEquals(2, hbck.getOverlapGroups(tableName).size());
- assertEquals(ROWKEYS.length, countRows()); // seems like the "bigger" region won.
-
- // fix the degenerate region.
- doFsck(conf, true);
-
- // check that the degenerate region is gone and no data loss
- HBaseFsck hbck2 = doFsck(conf,false);
- assertNoErrors(hbck2);
- assertEquals(0, hbck2.getOverlapGroups(tableName).size());
- assertEquals(ROWKEYS.length, countRows());
- } finally {
- cleanupTable(tableName);
- }
- }
-
- /**
- * This create and fixes a bad table with regions that have a duplicate
- * start key
- */
- @Test (timeout=180000)
- public void testDupeRegion() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- setupTable(tableName);
- assertNoErrors(doFsck(conf, false));
- assertEquals(ROWKEYS.length, countRows());
-
- // Now let's mess it up, by adding a region with a duplicate startkey
- RegionInfo hriDupe =
- createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"));
- TEST_UTIL.assignRegion(hriDupe);
-
- ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
- TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
-
- // Yikes! The assignment manager can't tell between diff between two
- // different regions with the same start/endkeys since it doesn't
- // differentiate on ts/regionId! We actually need to recheck
- // deployments!
- while (findDeployedHSI(getDeployedHRIs(admin), hriDupe) == null) {
- Thread.sleep(250);
- }
-
- LOG.debug("Finished assignment of dupe region");
-
- // TODO why is dupe region different from dupe start keys?
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS, HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS });
- assertEquals(2, hbck.getOverlapGroups(tableName).size());
- assertEquals(ROWKEYS.length, countRows()); // seems like the "bigger" region won.
-
- // fix the degenerate region.
- doFsck(conf, true);
-
- // check that the degenerate region is gone and no data loss
- HBaseFsck hbck2 = doFsck(conf,false);
- assertNoErrors(hbck2);
- assertEquals(0, hbck2.getOverlapGroups(tableName).size());
- assertEquals(ROWKEYS.length, countRows());
- } finally {
- cleanupTable(tableName);
- }
- }
-
-
- /**
- * This creates and fixes a bad table where a region is completely contained
- * by another region.
- */
- @Test (timeout=180000)
- public void testContainedRegionOverlap() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- setupTable(tableName);
- assertEquals(ROWKEYS.length, countRows());
-
- // Mess it up by creating an overlap in the metadata
- RegionInfo hriOverlap =
- createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B"));
- TEST_UTIL.assignRegion(hriOverlap);
-
- ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
- TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
-
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
- assertEquals(2, hbck.getOverlapGroups(tableName).size());
- assertEquals(ROWKEYS.length, countRows());
-
- // fix the problem.
- doFsck(conf, true);
-
- // verify that overlaps are fixed
- HBaseFsck hbck2 = doFsck(conf,false);
- assertNoErrors(hbck2);
- assertEquals(0, hbck2.getOverlapGroups(tableName).size());
- assertEquals(ROWKEYS.length, countRows());
- } finally {
- cleanupTable(tableName);
- }
- }
-
- /**
- * Test fixing lingering reference file.
- */
- @Test (timeout=180000)
- public void testLingeringReferenceFile() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- setupTable(tableName);
- assertEquals(ROWKEYS.length, countRows());
-
- // Mess it up by creating a fake reference file
- FileSystem fs = FileSystem.get(conf);
- Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), tableName);
- Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
- Path famDir = new Path(regionDir, FAM_STR);
- Path fakeReferenceFile = new Path(famDir, "fbce357483ceea.12144538");
- fs.create(fakeReferenceFile);
-
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_REFERENCE_HFILE });
- // fix reference file
- doFsck(conf, true);
- // check that reference file fixed
- assertNoErrors(doFsck(conf, false));
- } finally {
- cleanupTable(tableName);
- }
- }
-
- /**
- * Test fixing lingering HFileLinks.
- */
- @Test(timeout = 180000)
- public void testLingeringHFileLinks() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- setupTable(tableName);
-
- FileSystem fs = FileSystem.get(conf);
- Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), tableName);
- Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
- String regionName = regionDir.getName();
- Path famDir = new Path(regionDir, FAM_STR);
- String HFILE_NAME = "01234567abcd";
- Path hFilePath = new Path(famDir, HFILE_NAME);
-
- // creating HFile
- HFileContext context = new HFileContextBuilder().withIncludesTags(false).build();
- HFile.Writer w =
- HFile.getWriterFactoryNoCache(conf).withPath(fs, hFilePath).withFileContext(context)
- .create();
- w.close();
-
- HFileLink.create(conf, fs, famDir, tableName, regionName, HFILE_NAME);
-
- // should report no error
- HBaseFsck hbck = doFsck(conf, false);
- assertNoErrors(hbck);
-
- // Delete linked file
- fs.delete(hFilePath, true);
-
- // Check without fix should show the error
- hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK });
-
- // Fixing the error
- hbck = doFsck(conf, true);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK });
-
- // Fix should sideline these files, thus preventing the error
- hbck = doFsck(conf, false);
- assertNoErrors(hbck);
- } finally {
- cleanupTable(tableName);
- }
- }
-
- @Test(timeout = 180000)
- public void testCorruptLinkDirectory() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- setupTable(tableName);
- FileSystem fs = FileSystem.get(conf);
-
- Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), tableName);
- Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
- Path famDir = new Path(regionDir, FAM_STR);
- String regionName = regionDir.getName();
- String HFILE_NAME = "01234567abcd";
- String link = HFileLink.createHFileLinkName(tableName, regionName, HFILE_NAME);
-
- // should report no error
- HBaseFsck hbck = doFsck(conf, false);
- assertNoErrors(hbck);
-
- // creating a directory with file instead of the HFileLink file
- fs.mkdirs(new Path(famDir, link));
- fs.create(new Path(new Path(famDir, link), "somefile"));
-
- // Check without fix should show the error
- hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK });
-
- // Fixing the error
- hbck = doFsck(conf, true);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK });
-
- // Fix should sideline these files, thus preventing the error
- hbck = doFsck(conf, false);
- assertNoErrors(hbck);
- } finally {
- cleanupTable(tableName);
- }
- }
-
- @Test (timeout=180000)
- public void testMetaOffline() throws Exception {
- // check no errors
- HBaseFsck hbck = doFsck(conf, false);
- assertNoErrors(hbck);
- deleteMetaRegion(conf, true, false, false);
- hbck = doFsck(conf, false);
- // ERROR_CODE.UNKNOWN is coming because we reportError with a message for the hbase:meta
- // inconsistency and whether we will be fixing it or not.
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NO_META_REGION, HBaseFsck.ErrorReporter.ERROR_CODE.UNKNOWN });
- hbck = doFsck(conf, true);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NO_META_REGION, HBaseFsck.ErrorReporter.ERROR_CODE.UNKNOWN });
- hbck = doFsck(conf, false);
- assertNoErrors(hbck);
- }
-
- /**
- * This creates and fixes a bad table where an overlap group of
- * 3 regions. Set HBaseFsck.maxMerge to 2 to trigger sideline overlapped
- * region. Mess around the meta data so that closeRegion/offlineRegion
- * throws exceptions.
- */
- @Test (timeout=180000)
- public void testSidelineOverlapRegion() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- setupTable(tableName);
- assertEquals(ROWKEYS.length, countRows());
-
- // Mess it up by creating an overlap
- MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
- RegionInfo hriOverlap1 =
- createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("AB"));
- TEST_UTIL.assignRegion(hriOverlap1);
-
- RegionInfo hriOverlap2 =
- createRegion(tbl.getTableDescriptor(), Bytes.toBytes("AB"), Bytes.toBytes("B"));
- TEST_UTIL.assignRegion(hriOverlap2);
-
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS,
- HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS, HBaseFsck.ErrorReporter.ERROR_CODE.OVERLAP_IN_REGION_CHAIN});
- assertEquals(3, hbck.getOverlapGroups(tableName).size());
- assertEquals(ROWKEYS.length, countRows());
-
- // mess around the overlapped regions, to trigger NotServingRegionException
- Multimap<byte[], HBaseFsck.HbckInfo> overlapGroups = hbck.getOverlapGroups(tableName);
- ServerName serverName = null;
- byte[] regionName = null;
- for (HBaseFsck.HbckInfo hbi: overlapGroups.values()) {
- if ("A".equals(Bytes.toString(hbi.getStartKey()))
- && "B".equals(Bytes.toString(hbi.getEndKey()))) {
- regionName = hbi.getRegionName();
-
- // get an RS not serving the region to force bad assignment info in to META.
- int k = cluster.getServerWith(regionName);
- for (int i = 0; i < 3; i++) {
- if (i != k) {
- HRegionServer rs = cluster.getRegionServer(i);
- serverName = rs.getServerName();
- break;
- }
- }
-
- HBaseFsckRepair.closeRegionSilentlyAndWait(connection,
- cluster.getRegionServer(k).getServerName(), hbi.getHdfsHRI());
- admin.offline(regionName);
- break;
- }
- }
-
- assertNotNull(regionName);
- assertNotNull(serverName);
- try (Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService)) {
- Put put = new Put(regionName);
- put.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
- Bytes.toBytes(serverName.getAddress().toString()));
- meta.put(put);
- }
-
- // fix the problem.
- HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
- fsck.connect();
- HBaseFsck.setDisplayFullReport(); // i.e. -details
- fsck.setTimeLag(0);
- fsck.setFixAssignments(true);
- fsck.setFixMeta(true);
- fsck.setFixHdfsHoles(true);
- fsck.setFixHdfsOverlaps(true);
- fsck.setFixHdfsOrphans(true);
- fsck.setFixVersionFile(true);
- fsck.setSidelineBigOverlaps(true);
- fsck.setMaxMerge(2);
- fsck.onlineHbck();
- fsck.close();
-
- // verify that overlaps are fixed, and there are less rows
- // since one region is sidelined.
- HBaseFsck hbck2 = doFsck(conf,false);
- assertNoErrors(hbck2);
- assertEquals(0, hbck2.getOverlapGroups(tableName).size());
- assertTrue(ROWKEYS.length > countRows());
- } finally {
- cleanupTable(tableName);
- }
- }
-
- @Test(timeout=180000)
- public void testHBaseFsck() throws Exception {
- assertNoErrors(doFsck(conf, false));
- final TableName tableName = TableName.valueOf(name.getMethodName());
- HTableDescriptor desc = new HTableDescriptor(tableName);
- HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
- desc.addFamily(hcd); // If a tableName has no CF's it doesn't get checked
- createTable(TEST_UTIL, desc, null);
-
- // We created 1 table, should be fine
- assertNoErrors(doFsck(conf, false));
-
- // Now let's mess it up and change the assignment in hbase:meta to
- // point to a different region server
- Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
- Scan scan = new Scan();
- scan.setStartRow(Bytes.toBytes(tableName+",,"));
- ResultScanner scanner = meta.getScanner(scan);
- RegionInfo hri = null;
-
- Result res = scanner.next();
- ServerName currServer =
- ProtobufUtil.parseServerNameFrom(res.getValue(HConstants.CATALOG_FAMILY,
- HConstants.SERVER_QUALIFIER));
- long startCode = Bytes.toLong(res.getValue(HConstants.CATALOG_FAMILY,
- HConstants.STARTCODE_QUALIFIER));
-
- for (JVMClusterUtil.RegionServerThread rs :
- TEST_UTIL.getHBaseCluster().getRegionServerThreads()) {
-
- ServerName sn = rs.getRegionServer().getServerName();
-
- // When we find a diff RS, change the assignment and break
- if (!currServer.getHostAndPort().equals(sn.getHostAndPort()) ||
- startCode != sn.getStartcode()) {
- Put put = new Put(res.getRow());
- put.setDurability(Durability.SKIP_WAL);
- put.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
- Bytes.toBytes(sn.getHostAndPort()));
- put.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
- Bytes.toBytes(sn.getStartcode()));
- meta.put(put);
- hri = MetaTableAccessor.getRegionInfo(res);
- break;
- }
- }
-
- // Try to fix the data
- assertErrors(doFsck(conf, true), new HBaseFsck.ErrorReporter.ERROR_CODE[]{
- HBaseFsck.ErrorReporter.ERROR_CODE.SERVER_DOES_NOT_MATCH_META});
-
- TEST_UTIL.getHBaseCluster().getMaster()
- .getAssignmentManager().waitForAssignment(hri);
-
- // Should be fixed now
- assertNoErrors(doFsck(conf, false));
-
- // comment needed - what is the purpose of this line
- Table t = connection.getTable(tableName, tableExecutorService);
- ResultScanner s = t.getScanner(new Scan());
- s.close();
- t.close();
-
- scanner.close();
- meta.close();
- }
-
- /**
- * This creates and fixes a bad table with a missing region -- hole in meta and data present but
- * .regioninfo missing (an orphan hdfs region)in the fs. At last we check every row was present
- * at the correct region.
- */
- @Test(timeout = 180000)
- public void testHDFSRegioninfoMissingAndCheckRegionBoundary() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- setupTable(tableName);
- assertEquals(ROWKEYS.length, countRows());
-
- // Mess it up by leaving a hole in the meta data
- admin.disableTable(tableName);
- deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true,
- true, false, true, RegionInfo.DEFAULT_REPLICA_ID);
- admin.enableTable(tableName);
-
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck,
- new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.ORPHAN_HDFS_REGION,
- HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
- HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
- // holes are separate from overlap groups
- assertEquals(0, hbck.getOverlapGroups(tableName).size());
-
- // fix hole
- doFsck(conf, true);
-
- // check that hole fixed
- assertNoErrors(doFsck(conf, false));
-
- // check data belong to the correct region,every scan should get one row.
- for (int i = 0; i < ROWKEYS.length; i++) {
- if (i != ROWKEYS.length - 1) {
- assertEquals(1, countRows(ROWKEYS[i], ROWKEYS[i + 1]));
- } else {
- assertEquals(1, countRows(ROWKEYS[i], null));
- }
- }
-
- } finally {
- cleanupTable(tableName);
- }
- }
-
- /**
- * Creates and fixes a bad table with a successful split that have a deployed
- * start and end keys
- */
- @Test (timeout=180000)
- public void testSplitAndDupeRegion() throws Exception {
- TableName table =
- TableName.valueOf("testSplitAndDupeRegion");
- Table meta = null;
-
- try {
- setupTable(table);
-
- assertNoErrors(doFsck(conf, false));
- assertEquals(ROWKEYS.length, countRows());
-
- // No Catalog Janitor running
- admin.enableCatalogJanitor(false);
- meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
- HRegionLocation loc = this.connection.getRegionLocation(table, SPLITS[0], false);
- RegionInfo hriParent = loc.getRegionInfo();
-
- // Split Region A just before B
- this.connection.getAdmin().split(table, Bytes.toBytes("A@"));
- Thread.sleep(1000);
-
- // We need to make sure the parent region is not in a split state, so we put it in CLOSED state.
- regionStates.updateRegionState(hriParent, RegionState.State.CLOSED);
- TEST_UTIL.assignRegion(hriParent);
- MetaTableAccessor.addRegionToMeta(meta, hriParent);
- ServerName server = regionStates.getRegionServerOfRegion(hriParent);
-
- if (server != null)
- TEST_UTIL.assertRegionOnServer(hriParent, server, REGION_ONLINE_TIMEOUT);
-
- while (findDeployedHSI(getDeployedHRIs((HBaseAdmin) admin), hriParent) == null) {
- Thread.sleep(250);
- }
-
- LOG.debug("Finished assignment of parent region");
-
- // TODO why is dupe region different from dupe start keys?
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS,
- HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS, HBaseFsck.ErrorReporter.ERROR_CODE.OVERLAP_IN_REGION_CHAIN});
- assertEquals(3, hbck.getOverlapGroups(table).size());
-
- // fix the degenerate region.
- hbck = new HBaseFsck(conf, hbfsckExecutorService);
- hbck.setDisplayFullReport(); // i.e. -details
- hbck.setTimeLag(0);
- hbck.setFixHdfsOverlaps(true);
- hbck.setRemoveParents(true);
- hbck.setFixReferenceFiles(true);
- hbck.setFixHFileLinks(true);
- hbck.connect();
- hbck.onlineHbck();
- hbck.close();
-
- hbck = doFsck(conf, false);
-
- assertNoErrors(hbck);
- assertEquals(0, hbck.getOverlapGroups(table).size());
- assertEquals(ROWKEYS.length, countRows());
- } finally {
- cleanupTable(table);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/89e2869e/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/TestOfflineMetaRebuildBase.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/TestOfflineMetaRebuildBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/TestOfflineMetaRebuildBase.java
deleted file mode 100644
index 1560efe..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/TestOfflineMetaRebuildBase.java
+++ /dev/null
@@ -1,126 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.util.hbck;
-
-import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
-import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-import java.util.Arrays;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.testclassification.MediumTests;
-import org.apache.hadoop.hbase.testclassification.MiscTests;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.hbase.util.HBaseFsck;
-import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
-import org.junit.Ignore;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-/**
- * This builds a table, removes info from meta, and then rebuilds meta.
- */
-@Category({MiscTests.class, MediumTests.class})
-public class TestOfflineMetaRebuildBase extends OfflineMetaRebuildTestCore {
- private static final Log LOG = LogFactory.getLog(TestOfflineMetaRebuildBase.class);
-
- @SuppressWarnings("deprecation")
- @Ignore @Test(timeout = 120000) // To fix post HBASE-14614
- public void testMetaRebuild() throws Exception {
- wipeOutMeta();
-
- // is meta really messed up?
- assertEquals(1, scanMeta());
- assertErrors(doFsck(conf, false),
- new ERROR_CODE[] {
- ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
- ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
- ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
- ERROR_CODE.NOT_IN_META_OR_DEPLOYED});
- // Note, would like to check # of tables, but this takes a while to time
- // out.
-
- // shutdown the minicluster
- TEST_UTIL.shutdownMiniHBaseCluster();
- TEST_UTIL.shutdownMiniZKCluster();
-
- // rebuild meta table from scratch
- HBaseFsck fsck = new HBaseFsck(conf);
- assertTrue(fsck.rebuildMeta(false));
- assertTrue("HBCK meta recovery WAL directory exist.", validateHBCKMetaRecoveryWALDir());
-
- // bring up the minicluster
- TEST_UTIL.startMiniZKCluster();
- TEST_UTIL.restartHBaseCluster(3);
- try (Connection connection = ConnectionFactory.createConnection(TEST_UTIL.getConfiguration())) {
- Admin admin = connection.getAdmin();
- if (admin.isTableDisabled(table))
- admin.enableTable(table);
- LOG.info("Waiting for no more RIT");
- TEST_UTIL.waitUntilNoRegionsInTransition(60000);
- LOG.info("No more RIT in ZK, now doing final test verification");
-
- // everything is good again.
- assertEquals(5, scanMeta()); // including table state rows
- TableName[] tableNames = TEST_UTIL.getAdmin().listTableNames();
- for (TableName tableName : tableNames) {
- HTableDescriptor tableDescriptor = TEST_UTIL.getAdmin().getTableDescriptor(tableName);
- assertNotNull(tableDescriptor);
- assertTrue(TEST_UTIL.getAdmin().isTableEnabled(tableName));
- }
- HTableDescriptor[] htbls = admin.listTables();
- LOG.info("Tables present after restart: " + Arrays.toString(htbls));
- assertEquals(1, htbls.length);
- }
-
- assertErrors(doFsck(conf, false), new ERROR_CODE[] {});
- LOG.info("Table " + table + " has " + tableRowCount(conf, table) + " entries.");
- assertEquals(16, tableRowCount(conf, table));
- }
-
- /**
- * Validate whether Meta recovery empty WAL directory is removed.
- * @return True if directory is removed otherwise false.
- */
- private boolean validateHBCKMetaRecoveryWALDir() throws IOException {
- Path rootdir = FSUtils.getRootDir(TEST_UTIL.getConfiguration());
- Path walLogDir = new Path(rootdir, HConstants.HREGION_LOGDIR_NAME);
- FileSystem fs = TEST_UTIL.getTestFileSystem();
- FileStatus[] walFiles = FSUtils.listStatus(fs, walLogDir, null);
- assertNotNull(walFiles);
- for (FileStatus fsStat : walFiles) {
- if (fsStat.isDirectory() && fsStat.getPath().getName().startsWith("hbck-meta-recovery-")) {
- return false;
- }
- }
- return true;
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/89e2869e/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/TestOfflineMetaRebuildHole.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/TestOfflineMetaRebuildHole.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/TestOfflineMetaRebuildHole.java
deleted file mode 100644
index 60c4b25..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/TestOfflineMetaRebuildHole.java
+++ /dev/null
@@ -1,94 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.util.hbck;
-
-import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
-import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-
-import java.util.Arrays;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.testclassification.MediumTests;
-import org.apache.hadoop.hbase.testclassification.MiscTests;
-import org.apache.hadoop.hbase.util.HBaseFsck;
-import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
-import org.junit.Ignore;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-/**
- * This builds a table, removes info from meta, and then fails when attempting
- * to rebuild meta.
- */
-@Ignore
-@Category({MiscTests.class, MediumTests.class})
-public class TestOfflineMetaRebuildHole extends OfflineMetaRebuildTestCore {
- private final static Log LOG = LogFactory.getLog(TestOfflineMetaRebuildHole.class);
-
- @Test(timeout = 120000)
- public void testMetaRebuildHoleFail() throws Exception {
- // Fully remove a meta entry and hdfs region
- byte[] startKey = splits[1];
- byte[] endKey = splits[2];
- deleteRegion(conf, htbl, startKey, endKey);
-
- wipeOutMeta();
-
- // is meta really messed up?
- assertEquals(1, scanMeta());
- assertErrors(doFsck(conf, false), new ERROR_CODE[] {
- ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
- ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
- ERROR_CODE.NOT_IN_META_OR_DEPLOYED});
- // Note, would like to check # of tables, but this takes a while to time
- // out.
-
- // shutdown the minicluster
- TEST_UTIL.shutdownMiniHBaseCluster();
- TEST_UTIL.shutdownMiniZKCluster();
-
- // attempt to rebuild meta table from scratch
- HBaseFsck fsck = new HBaseFsck(conf);
- assertFalse(fsck.rebuildMeta(false));
- fsck.close();
-
- // bring up the minicluster
- TEST_UTIL.startMiniZKCluster(); // tables seem enabled by default
- TEST_UTIL.restartHBaseCluster(3);
-
- LOG.info("Waiting for no more RIT");
- TEST_UTIL.waitUntilNoRegionsInTransition(60000);
-
- // Meta still messed up.
- assertEquals(1, scanMeta());
- HTableDescriptor[] htbls = getTables(TEST_UTIL.getConfiguration());
- LOG.info("Tables present after restart: " + Arrays.toString(htbls));
-
- // After HBASE-451 HBaseAdmin.listTables() gets table descriptors from FS,
- // so the table is still present and this should be 1.
- assertEquals(1, htbls.length);
- assertErrors(doFsck(conf, false), new ERROR_CODE[] {
- ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
- ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
- ERROR_CODE.NOT_IN_META_OR_DEPLOYED});
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/89e2869e/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/TestOfflineMetaRebuildOverlap.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/TestOfflineMetaRebuildOverlap.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/TestOfflineMetaRebuildOverlap.java
deleted file mode 100644
index 9bf2212..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/TestOfflineMetaRebuildOverlap.java
+++ /dev/null
@@ -1,105 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.util.hbck;
-
-import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
-import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-
-import java.util.Arrays;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.testclassification.MediumTests;
-import org.apache.hadoop.hbase.testclassification.MiscTests;
-import org.apache.hadoop.hbase.util.HBaseFsck;
-import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
-import org.apache.hadoop.hbase.util.HBaseFsck.HbckInfo;
-import org.junit.Ignore;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Multimap;
-
-/**
- * This builds a table, builds an overlap, and then fails when attempting to
- * rebuild meta.
- */
-@Ignore
-@Category({MiscTests.class, MediumTests.class})
-public class TestOfflineMetaRebuildOverlap extends OfflineMetaRebuildTestCore {
- private final static Log LOG = LogFactory.getLog(TestOfflineMetaRebuildOverlap.class);
-
- @Test(timeout = 120000)
- public void testMetaRebuildOverlapFail() throws Exception {
- // Add a new .regioninfo meta entry in hdfs
- byte[] startKey = splits[0];
- byte[] endKey = splits[2];
- createRegion(conf, htbl, startKey, endKey);
-
- wipeOutMeta();
-
- // is meta really messed up?
- assertEquals(1, scanMeta());
- assertErrors(doFsck(conf, false),
- new ERROR_CODE[] {
- ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
- ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
- ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
- ERROR_CODE.NOT_IN_META_OR_DEPLOYED});
- // Note, would like to check # of tables, but this takes a while to time
- // out.
-
- // shutdown the minicluster
- TEST_UTIL.shutdownMiniHBaseCluster();
- TEST_UTIL.shutdownMiniZKCluster();
-
- // attempt to rebuild meta table from scratch
- HBaseFsck fsck = new HBaseFsck(conf);
- assertFalse(fsck.rebuildMeta(false));
-
- Multimap<byte[], HbckInfo> problems = fsck.getOverlapGroups(table);
- assertEquals(1, problems.keySet().size());
- assertEquals(3, problems.size());
-
- // bring up the minicluster
- TEST_UTIL.startMiniZKCluster(); // tables seem enabled by default
- TEST_UTIL.restartHBaseCluster(3);
-
- LOG.info("Waiting for no more RIT");
- TEST_UTIL.waitUntilNoRegionsInTransition(60000);
- LOG.info("No more RIT in ZK, now doing final test verification");
-
- // Meta still messed up.
- assertEquals(1, scanMeta());
- HTableDescriptor[] htbls = getTables(TEST_UTIL.getConfiguration());
- LOG.info("Tables present after restart: " + Arrays.toString(htbls));
-
- // After HBASE-451 HBaseAdmin.listTables() gets table descriptors from FS,
- // so the table is still present and this should be 1.
- assertEquals(1, htbls.length);
- assertErrors(doFsck(conf, false),
- new ERROR_CODE[] {
- ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
- ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
- ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
- ERROR_CODE.NOT_IN_META_OR_DEPLOYED});
- }
-}
[2/2] hbase git commit: HBASE-19272 Deal with HBCK tests disabled by
HBASE-14614 AMv2 when HBCK works again...
Posted by st...@apache.org.
HBASE-19272 Deal with HBCK tests disabled by HBASE-14614 AMv2 when HBCK
works again...
I removed them
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/89e2869e
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/89e2869e
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/89e2869e
Branch: refs/heads/master
Commit: 89e2869e225d204b6b19df5b0aee003a61fd10d4
Parents: 75f512b
Author: Michael Stack <st...@apache.org>
Authored: Fri Dec 15 12:18:49 2017 -0800
Committer: Michael Stack <st...@apache.org>
Committed: Fri Dec 15 12:19:49 2017 -0800
----------------------------------------------------------------------
.../hadoop/hbase/util/TestHBaseFsckOneRS.java | 1741 ------------------
.../hbase/util/TestHBaseFsckReplicas.java | 380 ----
.../hadoop/hbase/util/TestHBaseFsckTwoRS.java | 674 -------
.../util/hbck/TestOfflineMetaRebuildBase.java | 126 --
.../util/hbck/TestOfflineMetaRebuildHole.java | 94 -
.../hbck/TestOfflineMetaRebuildOverlap.java | 105 --
6 files changed, 3120 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/89e2869e/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckOneRS.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckOneRS.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckOneRS.java
deleted file mode 100644
index ba3475e..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckOneRS.java
+++ /dev/null
@@ -1,1741 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.util;
-
-import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
-import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertNoErrors;
-import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.ScheduledThreadPoolExecutor;
-import java.util.concurrent.SynchronousQueue;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicBoolean;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HRegionLocation;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.MetaTableAccessor;
-import org.apache.hadoop.hbase.MiniHBaseCluster;
-import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.ClusterConnection;
-import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
-import org.apache.hadoop.hbase.client.Connection;
-import org.apache.hadoop.hbase.client.ConnectionFactory;
-import org.apache.hadoop.hbase.client.Delete;
-import org.apache.hadoop.hbase.client.Get;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.RegionInfo;
-import org.apache.hadoop.hbase.client.RegionInfoBuilder;
-import org.apache.hadoop.hbase.client.RegionLocator;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.client.TableDescriptor;
-import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
-import org.apache.hadoop.hbase.client.replication.ReplicationAdmin;
-import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
-import org.apache.hadoop.hbase.io.hfile.TestHFile;
-import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
-import org.apache.hadoop.hbase.master.assignment.RegionStates;
-import org.apache.hadoop.hbase.regionserver.HRegion;
-import org.apache.hadoop.hbase.regionserver.HRegionServer;
-import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction;
-import org.apache.hadoop.hbase.replication.ReplicationFactory;
-import org.apache.hadoop.hbase.replication.ReplicationPeerConfig;
-import org.apache.hadoop.hbase.replication.ReplicationQueues;
-import org.apache.hadoop.hbase.replication.ReplicationQueuesArguments;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MiscTests;
-import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
-import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil;
-import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
-import org.junit.AfterClass;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Ignore;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-
-@Ignore // Turning off because needs fsck.
-@Category({MiscTests.class, LargeTests.class})
-public class TestHBaseFsckOneRS extends BaseTestHBaseFsck {
- @Rule
- public TestName name = new TestName();
-
- @BeforeClass
- public static void setUpBeforeClass() throws Exception {
- TEST_UTIL.getConfiguration().set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY,
- MasterSyncCoprocessor.class.getName());
-
- conf.setInt("hbase.regionserver.handler.count", 2);
- conf.setInt("hbase.regionserver.metahandler.count", 30);
-
- conf.setInt("hbase.htable.threads.max", POOL_SIZE);
- conf.setInt("hbase.hconnection.threads.max", 2 * POOL_SIZE);
- conf.setInt("hbase.hbck.close.timeout", 2 * REGION_ONLINE_TIMEOUT);
- conf.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY, 8 * REGION_ONLINE_TIMEOUT);
- TEST_UTIL.startMiniCluster(1);
-
- tableExecutorService = new ThreadPoolExecutor(1, POOL_SIZE, 60, TimeUnit.SECONDS,
- new SynchronousQueue<>(), Threads.newDaemonThreadFactory("testhbck"));
-
- hbfsckExecutorService = new ScheduledThreadPoolExecutor(POOL_SIZE);
-
- AssignmentManager assignmentManager =
- TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
- regionStates = assignmentManager.getRegionStates();
-
- connection = (ClusterConnection) TEST_UTIL.getConnection();
-
- admin = connection.getAdmin();
- admin.setBalancerRunning(false, true);
-
- TEST_UTIL.waitUntilAllRegionsAssigned(TableName.META_TABLE_NAME);
- TEST_UTIL.waitUntilAllRegionsAssigned(TableName.NAMESPACE_TABLE_NAME);
- }
-
- @AfterClass
- public static void tearDownAfterClass() throws Exception {
- tableExecutorService.shutdown();
- hbfsckExecutorService.shutdown();
- admin.close();
- TEST_UTIL.shutdownMiniCluster();
- }
-
- @Before
- public void setUp() {
- EnvironmentEdgeManager.reset();
- }
-
-
- /**
- * This creates a clean table and confirms that the table is clean.
- */
- @Test(timeout=180000)
- public void testHBaseFsckClean() throws Exception {
- assertNoErrors(doFsck(conf, false));
- TableName table = TableName.valueOf("tableClean");
- try {
- HBaseFsck hbck = doFsck(conf, false);
- assertNoErrors(hbck);
-
- setupTable(table);
- assertEquals(ROWKEYS.length, countRows());
-
- // We created 1 table, should be fine
- hbck = doFsck(conf, false);
- assertNoErrors(hbck);
- assertEquals(0, hbck.getOverlapGroups(table).size());
- assertEquals(ROWKEYS.length, countRows());
- } finally {
- cleanupTable(table);
- }
- }
-
- /**
- * Test thread pooling in the case where there are more regions than threads
- */
- @Test (timeout=180000)
- public void testHbckThreadpooling() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- // Create table with 4 regions
- setupTable(tableName);
-
- // limit number of threads to 1.
- Configuration newconf = new Configuration(conf);
- newconf.setInt("hbasefsck.numthreads", 1);
- assertNoErrors(doFsck(newconf, false));
-
- // We should pass without triggering a RejectedExecutionException
- } finally {
- cleanupTable(tableName);
- }
- }
-
- @Test (timeout=180000)
- public void testTableWithNoRegions() throws Exception {
- // We might end up with empty regions in a table
- // see also testNoHdfsTable()
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- // create table with one region
- HTableDescriptor desc = new HTableDescriptor(tableName);
- HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
- desc.addFamily(hcd); // If a table has no CF's it doesn't get checked
- createTable(TEST_UTIL, desc, null);
- tbl = connection.getTable(tableName, tableExecutorService);
-
- // Mess it up by leaving a hole in the assignment, meta, and hdfs data
- deleteRegion(conf, tbl.getTableDescriptor(), HConstants.EMPTY_START_ROW,
- HConstants.EMPTY_END_ROW, false, false, true);
-
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS });
-
- doFsck(conf, true);
-
- // fix hole
- doFsck(conf, true);
-
- // check that hole fixed
- assertNoErrors(doFsck(conf, false));
- } finally {
- cleanupTable(tableName);
- }
- }
-
- @Test (timeout=180000)
- public void testHbckFixOrphanTable() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- FileSystem fs = null;
- Path tableinfo = null;
- try {
- setupTable(tableName);
-
- Path hbaseTableDir = FSUtils.getTableDir(
- FSUtils.getRootDir(conf), tableName);
- fs = hbaseTableDir.getFileSystem(conf);
- FileStatus status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
- tableinfo = status.getPath();
- fs.rename(tableinfo, new Path("/.tableinfo"));
-
- //to report error if .tableinfo is missing.
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.NO_TABLEINFO_FILE });
-
- // fix OrphanTable with default .tableinfo (htd not yet cached on master)
- hbck = doFsck(conf, true);
- assertNoErrors(hbck);
- status = null;
- status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
- assertNotNull(status);
-
- HTableDescriptor htd = new HTableDescriptor(admin.getTableDescriptor(tableName));
- htd.setValue("NOT_DEFAULT", "true");
- admin.disableTable(tableName);
- admin.modifyTable(tableName, htd);
- admin.enableTable(tableName);
- fs.delete(status.getPath(), true);
-
- // fix OrphanTable with cache
- htd = admin.getTableDescriptor(tableName); // warms up cached htd on master
- hbck = doFsck(conf, true);
- assertNoErrors(hbck);
- status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
- assertNotNull(status);
- htd = admin.getTableDescriptor(tableName);
- assertEquals(htd.getValue("NOT_DEFAULT"), "true");
- } finally {
- if (fs != null) {
- fs.rename(new Path("/.tableinfo"), tableinfo);
- }
- cleanupTable(tableName);
- }
- }
-
- @Test (timeout=180000)
- public void testReadOnlyProperty() throws Exception {
- HBaseFsck hbck = doFsck(conf, false);
- Assert.assertEquals("shouldIgnorePreCheckPermission", true,
- hbck.shouldIgnorePreCheckPermission());
-
- hbck = doFsck(conf, true);
- Assert.assertEquals("shouldIgnorePreCheckPermission", false,
- hbck.shouldIgnorePreCheckPermission());
-
- hbck = doFsck(conf, true);
- hbck.setIgnorePreCheckPermission(true);
- Assert.assertEquals("shouldIgnorePreCheckPermission", true,
- hbck.shouldIgnorePreCheckPermission());
- }
-
- /**
- * This creates and fixes a bad table where a region is completely contained
- * by another region, and there is a hole (sort of like a bad split)
- */
- @Test (timeout=180000)
- public void testOverlapAndOrphan() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- setupTable(tableName);
- assertEquals(ROWKEYS.length, countRows());
-
- // Mess it up by creating an overlap in the metadata
- admin.disableTable(tableName);
- deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"), true,
- true, false, true, RegionInfo.DEFAULT_REPLICA_ID);
- admin.enableTable(tableName);
-
- RegionInfo hriOverlap =
- createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B"));
- TEST_UTIL.assignRegion(hriOverlap);
-
- ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
- TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
-
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck,
- new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.ORPHAN_HDFS_REGION,
- HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
- HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
-
- // fix the problem.
- doFsck(conf, true);
-
- // verify that overlaps are fixed
- HBaseFsck hbck2 = doFsck(conf,false);
- assertNoErrors(hbck2);
- assertEquals(0, hbck2.getOverlapGroups(tableName).size());
- assertEquals(ROWKEYS.length, countRows());
- } finally {
- cleanupTable(tableName);
- }
- }
-
- /**
- * This creates and fixes a bad table where a region overlaps two regions --
- * a start key contained in another region and its end key is contained in
- * yet another region.
- */
- @Test (timeout=180000)
- public void testCoveredStartKey() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- setupTable(tableName);
- assertEquals(ROWKEYS.length, countRows());
-
- // Mess it up by creating an overlap in the metadata
- RegionInfo hriOverlap =
- createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B2"));
- TEST_UTIL.assignRegion(hriOverlap);
-
- ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
- TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
-
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
- HBaseFsck.ErrorReporter.ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
- assertEquals(3, hbck.getOverlapGroups(tableName).size());
- assertEquals(ROWKEYS.length, countRows());
-
- // fix the problem.
- doFsck(conf, true);
-
- // verify that overlaps are fixed
- HBaseFsck hbck2 = doFsck(conf, false);
- assertErrors(hbck2, new HBaseFsck.ErrorReporter.ERROR_CODE[0]);
- assertEquals(0, hbck2.getOverlapGroups(tableName).size());
- assertEquals(ROWKEYS.length, countRows());
- } finally {
- cleanupTable(tableName);
- }
- }
-
- /**
- * This creates and fixes a bad table with a missing region -- hole in meta
- * and data missing in the fs.
- */
- @Test (timeout=180000)
- public void testRegionHole() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- setupTable(tableName);
- assertEquals(ROWKEYS.length, countRows());
-
- // Mess it up by leaving a hole in the assignment, meta, and hdfs data
- admin.disableTable(tableName);
- deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true,
- true, true);
- admin.enableTable(tableName);
-
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
- // holes are separate from overlap groups
- assertEquals(0, hbck.getOverlapGroups(tableName).size());
-
- // fix hole
- doFsck(conf, true);
-
- // check that hole fixed
- assertNoErrors(doFsck(conf,false));
- assertEquals(ROWKEYS.length - 2, countRows()); // lost a region so lost a row
- } finally {
- cleanupTable(tableName);
- }
- }
-
- /**
- * The region is not deployed when the table is disabled.
- */
- @Test (timeout=180000)
- public void testRegionShouldNotBeDeployed() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- LOG.info("Starting testRegionShouldNotBeDeployed.");
- MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
- assertTrue(cluster.waitForActiveAndReadyMaster());
-
-
- byte[][] SPLIT_KEYS = new byte[][] { new byte[0], Bytes.toBytes("aaa"),
- Bytes.toBytes("bbb"), Bytes.toBytes("ccc"), Bytes.toBytes("ddd") };
- TableDescriptor htdDisabled = TableDescriptorBuilder.newBuilder(tableName)
- .addColumnFamily(ColumnFamilyDescriptorBuilder.of(FAM))
- .build();
-
- // Write the .tableinfo
- FSTableDescriptors fstd = new FSTableDescriptors(conf);
- fstd.createTableDescriptor(htdDisabled);
- List<RegionInfo> disabledRegions =
- TEST_UTIL.createMultiRegionsInMeta(conf, htdDisabled, SPLIT_KEYS);
-
- // Let's just assign everything to first RS
- HRegionServer hrs = cluster.getRegionServer(0);
-
- // Create region files.
- admin.disableTable(tableName);
- admin.enableTable(tableName);
-
- // Disable the table and close its regions
- admin.disableTable(tableName);
- RegionInfo region = disabledRegions.remove(0);
- byte[] regionName = region.getRegionName();
-
- // The region should not be assigned currently
- assertTrue(cluster.getServerWith(regionName) == -1);
-
- // Directly open a region on a region server.
- // If going through AM/ZK, the region won't be open.
- // Even it is opened, AM will close it which causes
- // flakiness of this test.
- HRegion r = HRegion.openHRegion(
- region, htdDisabled, hrs.getWAL(region), conf);
- hrs.addRegion(r);
-
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.SHOULD_NOT_BE_DEPLOYED });
-
- // fix this fault
- doFsck(conf, true);
-
- // check result
- assertNoErrors(doFsck(conf, false));
- } finally {
- admin.enableTable(tableName);
- cleanupTable(tableName);
- }
- }
-
- /**
- * This test makes sure that parallel instances of Hbck is disabled.
- *
- * @throws Exception
- */
- @Test(timeout=180000)
- public void testParallelHbck() throws Exception {
- final ExecutorService service;
- final Future<HBaseFsck> hbck1,hbck2;
-
- class RunHbck implements Callable<HBaseFsck> {
- boolean fail = true;
- @Override
- public HBaseFsck call(){
- Configuration c = new Configuration(conf);
- c.setInt("hbase.hbck.lockfile.attempts", 1);
- // HBASE-13574 found that in HADOOP-2.6 and later, the create file would internally retry.
- // To avoid flakiness of the test, set low max wait time.
- c.setInt("hbase.hbck.lockfile.maxwaittime", 3);
- try{
- return doFsck(c, true); // Exclusive hbck only when fixing
- } catch(Exception e){
- if (e.getMessage().contains("Duplicate hbck")) {
- fail = false;
- }
- }
- // If we reach here, then an exception was caught
- if (fail) fail();
- return null;
- }
- }
- service = Executors.newFixedThreadPool(2);
- hbck1 = service.submit(new RunHbck());
- hbck2 = service.submit(new RunHbck());
- service.shutdown();
- //wait for 15 seconds, for both hbck calls finish
- service.awaitTermination(15, TimeUnit.SECONDS);
- HBaseFsck h1 = hbck1.get();
- HBaseFsck h2 = hbck2.get();
- // Make sure only one of the calls was successful
- assert(h1 == null || h2 == null);
- if (h1 != null) {
- assert(h1.getRetCode() >= 0);
- }
- if (h2 != null) {
- assert(h2.getRetCode() >= 0);
- }
- }
-
- /**
- * This test makes sure that with enough retries both parallel instances
- * of hbck will be completed successfully.
- *
- * @throws Exception
- */
- @Test (timeout=180000)
- public void testParallelWithRetriesHbck() throws Exception {
- final ExecutorService service;
- final Future<HBaseFsck> hbck1,hbck2;
-
- // With the ExponentialBackoffPolicyWithLimit (starting with 200 milliseconds sleep time, and
- // max sleep time of 5 seconds), we can retry around 15 times within 80 seconds before bail out.
- //
- // Note: the reason to use 80 seconds is that in HADOOP-2.6 and later, the create file would
- // retry up to HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds). See HBASE-13574 for more
- // details.
- final int timeoutInSeconds = 80;
- final int sleepIntervalInMilliseconds = 200;
- final int maxSleepTimeInMilliseconds = 6000;
- final int maxRetryAttempts = 15;
-
- class RunHbck implements Callable<HBaseFsck>{
-
- @Override
- public HBaseFsck call() throws Exception {
- // Increase retry attempts to make sure the non-active hbck doesn't get starved
- Configuration c = new Configuration(conf);
- c.setInt("hbase.hbck.lockfile.maxwaittime", timeoutInSeconds);
- c.setInt("hbase.hbck.lockfile.attempt.sleep.interval", sleepIntervalInMilliseconds);
- c.setInt("hbase.hbck.lockfile.attempt.maxsleeptime", maxSleepTimeInMilliseconds);
- c.setInt("hbase.hbck.lockfile.attempts", maxRetryAttempts);
- return doFsck(c, false);
- }
- }
-
- service = Executors.newFixedThreadPool(2);
- hbck1 = service.submit(new RunHbck());
- hbck2 = service.submit(new RunHbck());
- service.shutdown();
- //wait for some time, for both hbck calls finish
- service.awaitTermination(timeoutInSeconds * 2, TimeUnit.SECONDS);
- HBaseFsck h1 = hbck1.get();
- HBaseFsck h2 = hbck2.get();
- // Both should be successful
- assertNotNull(h1);
- assertNotNull(h2);
- assert(h1.getRetCode() >= 0);
- assert(h2.getRetCode() >= 0);
-
- }
-
- @Test (timeout = 180000)
- public void testRegionBoundariesCheck() throws Exception {
- HBaseFsck hbck = doFsck(conf, false);
- assertNoErrors(hbck); // no errors
- try {
- hbck.connect(); // need connection to have access to META
- hbck.checkRegionBoundaries();
- } catch (IllegalArgumentException e) {
- if (e.getMessage().endsWith("not a valid DFS filename.")) {
- fail("Table directory path is not valid." + e.getMessage());
- }
- } finally {
- hbck.close();
- }
- }
-
- /**
- * test region boundaries and make sure store file had been created.
- * @throws Exception
- */
- @Test(timeout = 180000)
- public void testRegionBoundariesCheckWithFlushTable() throws Exception {
- HBaseFsck hbck = doFsck(conf, false);
- assertNoErrors(hbck); // no errors
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- setupTable(tableName);
- admin.flush(tableName);
- hbck.connect(); // need connection to have access to META
- hbck.checkRegionBoundaries();
- assertNoErrors(hbck); // no errors
- } catch (IllegalArgumentException e) {
- if (e.getMessage().endsWith("not a valid DFS filename.")) {
- fail("Table directory path is not valid." + e.getMessage());
- }
- } finally {
- hbck.close();
- }
- }
-
- @Test (timeout=180000)
- public void testHbckAfterRegionMerge() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- Table meta = null;
- try {
- // disable CatalogJanitor
- TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false);
- setupTable(tableName);
- assertEquals(ROWKEYS.length, countRows());
-
- try(RegionLocator rl = connection.getRegionLocator(tbl.getName())) {
- // make sure data in regions, if in wal only there is no data loss
- admin.flush(tableName);
- RegionInfo region1 = rl.getRegionLocation(Bytes.toBytes("A")).getRegionInfo();
- RegionInfo region2 = rl.getRegionLocation(Bytes.toBytes("B")).getRegionInfo();
-
- int regionCountBeforeMerge = rl.getAllRegionLocations().size();
-
- assertNotEquals(region1, region2);
-
- // do a region merge
- admin.mergeRegionsAsync(
- region1.getEncodedNameAsBytes(), region2.getEncodedNameAsBytes(), false);
-
- // wait until region merged
- long timeout = System.currentTimeMillis() + 30 * 1000;
- while (true) {
- if (rl.getAllRegionLocations().size() < regionCountBeforeMerge) {
- break;
- } else if (System.currentTimeMillis() > timeout) {
- fail("Time out waiting on region " + region1.getEncodedName() + " and " + region2
- .getEncodedName() + " be merged");
- }
- Thread.sleep(10);
- }
-
- assertEquals(ROWKEYS.length, countRows());
-
- HBaseFsck hbck = doFsck(conf, false);
- assertNoErrors(hbck); // no errors
- }
-
- } finally {
- TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true);
- cleanupTable(tableName);
- IOUtils.closeQuietly(meta);
- }
- }
- /**
- * This creates entries in hbase:meta with no hdfs data. This should cleanly
- * remove the table.
- */
- @Test (timeout=180000)
- public void testNoHdfsTable() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- setupTable(tableName);
- assertEquals(ROWKEYS.length, countRows());
-
- // make sure data in regions, if in wal only there is no data loss
- admin.flush(tableName);
-
- // Mess it up by deleting hdfs dirs
- deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""),
- Bytes.toBytes("A"), false, false, true); // don't rm meta
- deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
- Bytes.toBytes("B"), false, false, true); // don't rm meta
- deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
- Bytes.toBytes("C"), false, false, true); // don't rm meta
- deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"),
- Bytes.toBytes(""), false, false, true); // don't rm meta
-
- // also remove the table directory in hdfs
- deleteTableDir(tableName);
-
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS,
- HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS,
- HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS,
- HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS,
- HBaseFsck.ErrorReporter.ERROR_CODE.ORPHAN_TABLE_STATE, });
- // holes are separate from overlap groups
- assertEquals(0, hbck.getOverlapGroups(tableName).size());
-
- // fix hole
- doFsck(conf, true); // detect dangling regions and remove those
-
- // check that hole fixed
- assertNoErrors(doFsck(conf,false));
- assertFalse("Table " + tableName + " should have been deleted", admin.tableExists(tableName));
- }
-
- /**
- * when the hbase.version file missing, It is fix the fault.
- */
- @Test (timeout=180000)
- public void testNoVersionFile() throws Exception {
- // delete the hbase.version file
- Path rootDir = FSUtils.getRootDir(conf);
- FileSystem fs = rootDir.getFileSystem(conf);
- Path versionFile = new Path(rootDir, HConstants.VERSION_FILE_NAME);
- fs.delete(versionFile, true);
-
- // test
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.NO_VERSION_FILE });
- // fix hbase.version missing
- doFsck(conf, true);
-
- // no version file fixed
- assertNoErrors(doFsck(conf, false));
- }
-
- @Test (timeout=180000)
- public void testNoTableState() throws Exception {
- // delete the hbase.version file
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- setupTable(tableName);
- // make sure data in regions, if in wal only there is no data loss
- admin.flush(tableName);
-
- MetaTableAccessor.deleteTableState(TEST_UTIL.getConnection(), tableName);
-
- // test
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.NO_TABLE_STATE });
- // fix table state missing
- doFsck(conf, true);
-
- assertNoErrors(doFsck(conf, false));
- assertTrue(TEST_UTIL.getAdmin().isTableEnabled(tableName));
- } finally {
- cleanupTable(tableName);
- }
- }
-
- /**
- * This creates two tables and mess both of them and fix them one by one
- */
- @Test (timeout=180000)
- public void testFixByTable() throws Exception {
- final TableName tableName1 = TableName.valueOf(name.getMethodName() + "1");
- final TableName tableName2 = TableName.valueOf(name.getMethodName() + "2");
- try {
- setupTable(tableName1);
- // make sure data in regions, if in wal only there is no data loss
- admin.flush(tableName1);
- // Mess them up by leaving a hole in the hdfs data
- deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
- Bytes.toBytes("C"), false, false, true); // don't rm meta
-
- setupTable(tableName2);
- // make sure data in regions, if in wal only there is no data loss
- admin.flush(tableName2);
- // Mess them up by leaving a hole in the hdfs data
- deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), false,
- false, true); // don't rm meta
-
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS,
- HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS });
-
- // fix hole in table 1
- doFsck(conf, true, tableName1);
- // check that hole in table 1 fixed
- assertNoErrors(doFsck(conf, false, tableName1));
- // check that hole in table 2 still there
- assertErrors(doFsck(conf, false, tableName2), new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS });
-
- // fix hole in table 2
- doFsck(conf, true, tableName2);
- // check that hole in both tables fixed
- assertNoErrors(doFsck(conf, false));
- assertEquals(ROWKEYS.length - 2, countRows());
- } finally {
- cleanupTable(tableName1);
- cleanupTable(tableName2);
- }
- }
- /**
- * A split parent in meta, in hdfs, and not deployed
- */
- @Test (timeout=180000)
- public void testLingeringSplitParent() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- Table meta = null;
- try {
- setupTable(tableName);
- assertEquals(ROWKEYS.length, countRows());
-
- // make sure data in regions, if in wal only there is no data loss
- admin.flush(tableName);
-
- HRegionLocation location;
- try(RegionLocator rl = connection.getRegionLocator(tbl.getName())) {
- location = rl.getRegionLocation(Bytes.toBytes("B"));
- }
-
- // Delete one region from meta, but not hdfs, unassign it.
- deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
- Bytes.toBytes("C"), true, true, false);
-
- // Create a new meta entry to fake it as a split parent.
- meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
- RegionInfo a = RegionInfoBuilder.newBuilder(tbl.getName())
- .setStartKey(Bytes.toBytes("B"))
- .setEndKey(Bytes.toBytes("BM"))
- .build();
- RegionInfo b = RegionInfoBuilder.newBuilder(tbl.getName())
- .setStartKey(Bytes.toBytes("BM"))
- .setEndKey(Bytes.toBytes("C"))
- .build();
- RegionInfo hri = RegionInfoBuilder.newBuilder(location.getRegion())
- .setOffline(true)
- .setSplit(true)
- .build();
-
- MetaTableAccessor.addRegionToMeta(meta, hri, a, b);
- meta.close();
- admin.flush(TableName.META_TABLE_NAME);
-
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_SPLIT_PARENT,
- HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN});
-
- // regular repair cannot fix lingering split parent
- hbck = doFsck(conf, true);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_SPLIT_PARENT,
- HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
- assertFalse(hbck.shouldRerun());
- hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_SPLIT_PARENT,
- HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN});
-
- // fix lingering split parent
- hbck = new HBaseFsck(conf, hbfsckExecutorService);
- hbck.connect();
- HBaseFsck.setDisplayFullReport(); // i.e. -details
- hbck.setTimeLag(0);
- hbck.setFixSplitParents(true);
- hbck.onlineHbck();
- assertTrue(hbck.shouldRerun());
- hbck.close();
-
- Get get = new Get(hri.getRegionName());
- Result result = meta.get(get);
- assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
- HConstants.SPLITA_QUALIFIER).isEmpty());
- assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
- HConstants.SPLITB_QUALIFIER).isEmpty());
- admin.flush(TableName.META_TABLE_NAME);
-
- // fix other issues
- doFsck(conf, true);
-
- // check that all are fixed
- assertNoErrors(doFsck(conf, false));
- assertEquals(ROWKEYS.length, countRows());
- } finally {
- cleanupTable(tableName);
- IOUtils.closeQuietly(meta);
- }
- }
-
- /**
- * Tests that LINGERING_SPLIT_PARENT is not erroneously reported for
- * valid cases where the daughters are there.
- */
- @Test (timeout=180000)
- public void testValidLingeringSplitParent() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- Table meta = null;
- try {
- setupTable(tableName);
- assertEquals(ROWKEYS.length, countRows());
-
- // make sure data in regions, if in wal only there is no data loss
- admin.flush(tableName);
-
- try(RegionLocator rl = connection.getRegionLocator(tbl.getName())) {
- HRegionLocation location = rl.getRegionLocation(Bytes.toBytes("B"));
-
- meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
- RegionInfo hri = location.getRegionInfo();
-
- // do a regular split
- byte[] regionName = location.getRegionInfo().getRegionName();
- admin.splitRegion(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
- TestEndToEndSplitTransaction.blockUntilRegionSplit(conf, 60000, regionName, true);
-
- // TODO: fixHdfsHoles does not work against splits, since the parent dir lingers on
- // for some time until children references are deleted. HBCK erroneously sees this as
- // overlapping regions
- HBaseFsck hbck = doFsck(conf, true, true, false, false, false, true, true, true, true,
- false, false, false, null);
- // no LINGERING_SPLIT_PARENT reported
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {}); //no LINGERING_SPLIT_PARENT reported
-
- // assert that the split hbase:meta entry is still there.
- Get get = new Get(hri.getRegionName());
- Result result = meta.get(get);
- assertNotNull(result);
- assertNotNull(MetaTableAccessor.getRegionInfo(result));
-
- assertEquals(ROWKEYS.length, countRows());
-
- // assert that we still have the split regions
- //SPLITS + 1 is # regions pre-split.
- assertEquals(rl.getStartKeys().length, SPLITS.length + 1 + 1);
- assertNoErrors(doFsck(conf, false));
- }
- } finally {
- cleanupTable(tableName);
- IOUtils.closeQuietly(meta);
- }
- }
-
- /**
- * Split crashed after write to hbase:meta finished for the parent region, but
- * failed to write daughters (pre HBASE-7721 codebase)
- */
- @Test(timeout=75000)
- public void testSplitDaughtersNotInMeta() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
- try {
- setupTable(tableName);
- assertEquals(ROWKEYS.length, countRows());
-
- // make sure data in regions, if in wal only there is no data loss
- admin.flush(tableName);
-
- try(RegionLocator rl = connection.getRegionLocator(tbl.getName())) {
- HRegionLocation location = rl.getRegionLocation(Bytes.toBytes("B"));
-
- RegionInfo hri = location.getRegionInfo();
-
- // Disable CatalogJanitor to prevent it from cleaning up the parent region
- // after split.
- admin.enableCatalogJanitor(false);
-
- // do a regular split
- byte[] regionName = location.getRegionInfo().getRegionName();
- admin.splitRegion(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
- TestEndToEndSplitTransaction.blockUntilRegionSplit(conf, 60000, regionName, true);
-
- PairOfSameType<RegionInfo> daughters = MetaTableAccessor.getDaughterRegions(
- meta.get(new Get(regionName)));
-
- // Delete daughter regions from meta, but not hdfs, unassign it.
-
- ServerName firstSN =
- rl.getRegionLocation(daughters.getFirst().getStartKey()).getServerName();
- ServerName secondSN =
- rl.getRegionLocation(daughters.getSecond().getStartKey()).getServerName();
-
- undeployRegion(connection, firstSN, daughters.getFirst());
- undeployRegion(connection, secondSN, daughters.getSecond());
-
- List<Delete> deletes = new ArrayList<>(2);
- deletes.add(new Delete(daughters.getFirst().getRegionName()));
- deletes.add(new Delete(daughters.getSecond().getRegionName()));
- meta.delete(deletes);
-
- // Remove daughters from regionStates
- RegionStates regionStates = TEST_UTIL.getMiniHBaseCluster().getMaster().
- getAssignmentManager().getRegionStates();
- regionStates.deleteRegion(daughters.getFirst());
- regionStates.deleteRegion(daughters.getSecond());
-
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
- HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
- HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); //no LINGERING_SPLIT_PARENT
-
- // now fix it. The fix should not revert the region split, but add daughters to META
- hbck = doFsck(conf, true, true, false, false, false, false, false, false, false,
- false, false, false, null);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
- HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
- HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
-
- // assert that the split hbase:meta entry is still there.
- Get get = new Get(hri.getRegionName());
- Result result = meta.get(get);
- assertNotNull(result);
- assertNotNull(MetaTableAccessor.getRegionInfo(result));
-
- assertEquals(ROWKEYS.length, countRows());
-
- // assert that we still have the split regions
- assertEquals(rl.getStartKeys().length, SPLITS.length + 1 + 1); //SPLITS + 1 is # regions
- // pre-split.
- assertNoErrors(doFsck(conf, false)); //should be fixed by now
- }
- } finally {
- admin.enableCatalogJanitor(true);
- meta.close();
- cleanupTable(tableName);
- }
- }
-
- /**
- * This creates and fixes a bad table with a missing region which is the 1st region -- hole in
- * meta and data missing in the fs.
- */
- @Test(timeout=120000)
- public void testMissingFirstRegion() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- setupTable(tableName);
- assertEquals(ROWKEYS.length, countRows());
-
- // Mess it up by leaving a hole in the assignment, meta, and hdfs data
- admin.disableTable(tableName);
- deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""), Bytes.toBytes("A"), true,
- true, true);
- admin.enableTable(tableName);
-
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY });
- // fix hole
- doFsck(conf, true);
- // check that hole fixed
- assertNoErrors(doFsck(conf, false));
- } finally {
- cleanupTable(tableName);
- }
- }
-
- /**
- * This creates and fixes a bad table with a missing region which is the 1st region -- hole in
- * meta and data missing in the fs.
- */
- @Test(timeout=120000)
- public void testRegionDeployedNotInHdfs() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- setupTable(tableName);
- admin.flush(tableName);
-
- // Mess it up by deleting region dir
- deleteRegion(conf, tbl.getTableDescriptor(),
- HConstants.EMPTY_START_ROW, Bytes.toBytes("A"), false,
- false, true);
-
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS });
- // fix hole
- doFsck(conf, true);
- // check that hole fixed
- assertNoErrors(doFsck(conf, false));
- } finally {
- cleanupTable(tableName);
- }
- }
-
- /**
- * This creates and fixes a bad table with missing last region -- hole in meta and data missing in
- * the fs.
- */
- @Test(timeout=120000)
- public void testMissingLastRegion() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- setupTable(tableName);
- assertEquals(ROWKEYS.length, countRows());
-
- // Mess it up by leaving a hole in the assignment, meta, and hdfs data
- admin.disableTable(tableName);
- deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"), Bytes.toBytes(""), true,
- true, true);
- admin.enableTable(tableName);
-
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY });
- // fix hole
- doFsck(conf, true);
- // check that hole fixed
- assertNoErrors(doFsck(conf, false));
- } finally {
- cleanupTable(tableName);
- }
- }
-
- /**
- * Test -noHdfsChecking option can detect and fix assignments issue.
- */
- @Test (timeout=180000)
- public void testFixAssignmentsAndNoHdfsChecking() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- setupTable(tableName);
- assertEquals(ROWKEYS.length, countRows());
-
- // Mess it up by closing a region
- deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"), true,
- false, false, false, RegionInfo.DEFAULT_REPLICA_ID);
-
- // verify there is no other errors
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck,
- new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.NOT_DEPLOYED,
- HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
-
- // verify that noHdfsChecking report the same errors
- HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
- fsck.connect();
- HBaseFsck.setDisplayFullReport(); // i.e. -details
- fsck.setTimeLag(0);
- fsck.setCheckHdfs(false);
- fsck.onlineHbck();
- assertErrors(fsck,
- new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.NOT_DEPLOYED,
- HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
- fsck.close();
-
- // verify that fixAssignments works fine with noHdfsChecking
- fsck = new HBaseFsck(conf, hbfsckExecutorService);
- fsck.connect();
- HBaseFsck.setDisplayFullReport(); // i.e. -details
- fsck.setTimeLag(0);
- fsck.setCheckHdfs(false);
- fsck.setFixAssignments(true);
- fsck.onlineHbck();
- assertTrue(fsck.shouldRerun());
- fsck.onlineHbck();
- assertNoErrors(fsck);
-
- assertEquals(ROWKEYS.length, countRows());
-
- fsck.close();
- } finally {
- cleanupTable(tableName);
- }
- }
-
- /**
- * Test -noHdfsChecking option can detect region is not in meta but deployed.
- * However, it can not fix it without checking Hdfs because we need to get
- * the region info from Hdfs in this case, then to patch the meta.
- */
- @Test (timeout=180000)
- public void testFixMetaNotWorkingWithNoHdfsChecking() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- setupTable(tableName);
- assertEquals(ROWKEYS.length, countRows());
-
- // Mess it up by deleting a region from the metadata
- deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
- Bytes.toBytes("B"), false, true, false, false, RegionInfo.DEFAULT_REPLICA_ID);
-
- // verify there is no other errors
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck,
- new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META,
- HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
-
- // verify that noHdfsChecking report the same errors
- HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
- fsck.connect();
- HBaseFsck.setDisplayFullReport(); // i.e. -details
- fsck.setTimeLag(0);
- fsck.setCheckHdfs(false);
- fsck.onlineHbck();
- assertErrors(fsck,
- new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META,
- HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
- fsck.close();
-
- // verify that fixMeta doesn't work with noHdfsChecking
- fsck = new HBaseFsck(conf, hbfsckExecutorService);
- fsck.connect();
- HBaseFsck.setDisplayFullReport(); // i.e. -details
- fsck.setTimeLag(0);
- fsck.setCheckHdfs(false);
- fsck.setFixAssignments(true);
- fsck.setFixMeta(true);
- fsck.onlineHbck();
- assertFalse(fsck.shouldRerun());
- assertErrors(fsck,
- new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META,
- HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
- fsck.close();
-
- // fix the cluster so other tests won't be impacted
- fsck = doFsck(conf, true);
- assertTrue(fsck.shouldRerun());
- fsck = doFsck(conf, true);
- assertNoErrors(fsck);
- } finally {
- cleanupTable(tableName);
- }
- }
-
- /**
- * Test -fixHdfsHoles doesn't work with -noHdfsChecking option,
- * and -noHdfsChecking can't detect orphan Hdfs region.
- */
- @Test (timeout=180000)
- public void testFixHdfsHolesNotWorkingWithNoHdfsChecking() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- setupTable(tableName);
- assertEquals(ROWKEYS.length, countRows());
-
- // Mess it up by creating an overlap in the metadata
- admin.disableTable(tableName);
- deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"), true,
- true, false, true, RegionInfo.DEFAULT_REPLICA_ID);
- admin.enableTable(tableName);
-
- RegionInfo hriOverlap =
- createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B"));
- TEST_UTIL.assignRegion(hriOverlap);
-
- ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
- TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
-
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.ORPHAN_HDFS_REGION,
- HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
- HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN});
-
- // verify that noHdfsChecking can't detect ORPHAN_HDFS_REGION
- HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
- fsck.connect();
- HBaseFsck.setDisplayFullReport(); // i.e. -details
- fsck.setTimeLag(0);
- fsck.setCheckHdfs(false);
- fsck.onlineHbck();
- assertErrors(fsck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
- fsck.close();
-
- // verify that fixHdfsHoles doesn't work with noHdfsChecking
- fsck = new HBaseFsck(conf, hbfsckExecutorService);
- fsck.connect();
- HBaseFsck.setDisplayFullReport(); // i.e. -details
- fsck.setTimeLag(0);
- fsck.setCheckHdfs(false);
- fsck.setFixHdfsHoles(true);
- fsck.setFixHdfsOverlaps(true);
- fsck.setFixHdfsOrphans(true);
- fsck.onlineHbck();
- assertFalse(fsck.shouldRerun());
- assertErrors(fsck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
- fsck.close();
- } finally {
- if (admin.isTableDisabled(tableName)) {
- admin.enableTable(tableName);
- }
- cleanupTable(tableName);
- }
- }
-
- /**
- * This creates a table and then corrupts an hfile. Hbck should quarantine the file.
- */
- @Test(timeout=180000)
- public void testQuarantineCorruptHFile() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- setupTable(tableName);
- assertEquals(ROWKEYS.length, countRows());
- admin.flush(tableName); // flush is async.
-
- FileSystem fs = FileSystem.get(conf);
- Path hfile = getFlushedHFile(fs, tableName);
-
- // Mess it up by leaving a hole in the assignment, meta, and hdfs data
- admin.disableTable(tableName);
-
- // create new corrupt file called deadbeef (valid hfile name)
- Path corrupt = new Path(hfile.getParent(), "deadbeef");
- TestHFile.truncateFile(fs, hfile, corrupt);
- LOG.info("Created corrupted file " + corrupt);
- HBaseFsck.debugLsr(conf, FSUtils.getRootDir(conf));
-
- // we cannot enable here because enable never finished due to the corrupt region.
- HBaseFsck res = HbckTestingUtil.doHFileQuarantine(conf, tableName);
- assertEquals(res.getRetCode(), 0);
- HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
- assertEquals(hfcc.getHFilesChecked(), 5);
- assertEquals(hfcc.getCorrupted().size(), 1);
- assertEquals(hfcc.getFailures().size(), 0);
- assertEquals(hfcc.getQuarantined().size(), 1);
- assertEquals(hfcc.getMissing().size(), 0);
-
- // Its been fixed, verify that we can enable.
- admin.enableTable(tableName);
- } finally {
- cleanupTable(tableName);
- }
- }
-
- /**
- * This creates a table and simulates the race situation where a concurrent compaction or split
- * has removed an hfile after the corruption checker learned about it.
- */
- @Test(timeout=180000)
- public void testQuarantineMissingHFile() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
-
- // inject a fault in the hfcc created.
- final FileSystem fs = FileSystem.get(conf);
- HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) {
- @Override
- public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles)
- throws IOException {
- return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
- AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
- @Override
- protected void checkHFile(Path p) throws IOException {
- if (attemptedFirstHFile.compareAndSet(false, true)) {
- assertTrue(fs.delete(p, true)); // make sure delete happened.
- }
- super.checkHFile(p);
- }
- };
- }
- };
- doQuarantineTest(tableName, hbck, 4, 0, 0, 0, 1); // 4 attempted, but 1 missing.
- hbck.close();
- }
-
- /**
- * This creates and fixes a bad table with regions that has startkey == endkey
- */
- @Test (timeout=180000)
- public void testDegenerateRegions() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- setupTable(tableName);
- assertNoErrors(doFsck(conf, false));
- assertEquals(ROWKEYS.length, countRows());
-
- // Now let's mess it up, by adding a region with a duplicate startkey
- RegionInfo hriDupe =
- createRegion(tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("B"));
- TEST_UTIL.assignRegion(hriDupe);
-
- ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
- TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
-
- HBaseFsck hbck = doFsck(conf,false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.DEGENERATE_REGION,
- HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS,
- HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS });
- assertEquals(2, hbck.getOverlapGroups(tableName).size());
- assertEquals(ROWKEYS.length, countRows());
-
- // fix the degenerate region.
- doFsck(conf, true);
-
- // check that the degenerate region is gone and no data loss
- HBaseFsck hbck2 = doFsck(conf,false);
- assertNoErrors(hbck2);
- assertEquals(0, hbck2.getOverlapGroups(tableName).size());
- assertEquals(ROWKEYS.length, countRows());
- } finally {
- cleanupTable(tableName);
- }
- }
-
- /**
- * Test mission REGIONINFO_QUALIFIER in hbase:meta
- */
- @Test (timeout=180000)
- public void testMissingRegionInfoQualifier() throws Exception {
- Connection connection = ConnectionFactory.createConnection(conf);
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- setupTable(tableName);
-
- // Mess it up by removing the RegionInfo for one region.
- final List<Delete> deletes = new LinkedList<>();
- Table meta = connection.getTable(TableName.META_TABLE_NAME, hbfsckExecutorService);
- MetaTableAccessor.fullScanRegions(connection, new MetaTableAccessor.Visitor() {
-
- @Override
- public boolean visit(Result rowResult) throws IOException {
- RegionInfo hri = MetaTableAccessor.getRegionInfo(rowResult);
- if (hri != null && !hri.getTable().isSystemTable()) {
- Delete delete = new Delete(rowResult.getRow());
- delete.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
- deletes.add(delete);
- }
- return true;
- }
- });
- meta.delete(deletes);
-
- // Mess it up by creating a fake hbase:meta entry with no associated RegionInfo
- meta.put(new Put(Bytes.toBytes(tableName + ",,1361911384013.810e28f59a57da91c66"))
- .addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
- Bytes.toBytes("node1:60020")));
- meta.put(new Put(Bytes.toBytes(tableName + ",,1361911384013.810e28f59a57da91c66"))
- .addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
- Bytes.toBytes(1362150791183L)));
- meta.close();
-
- HBaseFsck hbck = doFsck(conf, false);
- assertTrue(hbck.getErrors().getErrorList().contains(
- HBaseFsck.ErrorReporter.ERROR_CODE.EMPTY_META_CELL));
-
- // fix reference file
- hbck = doFsck(conf, true);
-
- // check that reference file fixed
- assertFalse(hbck.getErrors().getErrorList().contains(
- HBaseFsck.ErrorReporter.ERROR_CODE.EMPTY_META_CELL));
- } finally {
- cleanupTable(tableName);
- }
- connection.close();
- }
-
- /**
- * Test pluggable error reporter. It can be plugged in
- * from system property or configuration.
- */
- @Test (timeout=180000)
- public void testErrorReporter() throws Exception {
- try {
- MockErrorReporter.calledCount = 0;
- doFsck(conf, false);
- assertEquals(MockErrorReporter.calledCount, 0);
-
- conf.set("hbasefsck.errorreporter", MockErrorReporter.class.getName());
- doFsck(conf, false);
- assertTrue(MockErrorReporter.calledCount > 20);
- } finally {
- conf.set("hbasefsck.errorreporter",
- HBaseFsck.PrintingErrorReporter.class.getName());
- MockErrorReporter.calledCount = 0;
- }
- }
-
- @Test(timeout=180000)
- public void testCheckReplication() throws Exception {
- // check no errors
- HBaseFsck hbck = doFsck(conf, false);
- assertNoErrors(hbck);
-
- // create peer
- ReplicationAdmin replicationAdmin = new ReplicationAdmin(conf);
- Assert.assertEquals(0, replicationAdmin.getPeersCount());
- int zkPort = conf.getInt(HConstants.ZOOKEEPER_CLIENT_PORT,
- HConstants.DEFAULT_ZOOKEPER_CLIENT_PORT);
- ReplicationPeerConfig rpc = new ReplicationPeerConfig();
- rpc.setClusterKey("127.0.0.1:" + zkPort + ":/hbase");
- replicationAdmin.addPeer("1", rpc, null);
- replicationAdmin.getPeersCount();
- Assert.assertEquals(1, replicationAdmin.getPeersCount());
-
- // create replicator
- ZKWatcher zkw = new ZKWatcher(conf, "Test Hbase Fsck", connection);
- ReplicationQueues repQueues =
- ReplicationFactory.getReplicationQueues(new ReplicationQueuesArguments(conf, connection,
- zkw));
- repQueues.init("server1");
- // queues for current peer, no errors
- repQueues.addLog("1", "file1");
- repQueues.addLog("1-server2", "file1");
- Assert.assertEquals(2, repQueues.getAllQueues().size());
- hbck = doFsck(conf, false);
- assertNoErrors(hbck);
-
- // queues for removed peer
- repQueues.addLog("2", "file1");
- repQueues.addLog("2-server2", "file1");
- Assert.assertEquals(4, repQueues.getAllQueues().size());
- hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.UNDELETED_REPLICATION_QUEUE,
- HBaseFsck.ErrorReporter.ERROR_CODE.UNDELETED_REPLICATION_QUEUE });
-
- // fix the case
- hbck = doFsck(conf, true);
- hbck = doFsck(conf, false);
- assertNoErrors(hbck);
- // ensure only "2" is deleted
- Assert.assertEquals(2, repQueues.getAllQueues().size());
- Assert.assertNull(repQueues.getLogsInQueue("2"));
- Assert.assertNull(repQueues.getLogsInQueue("2-sever2"));
-
- replicationAdmin.removePeer("1");
- repQueues.removeAllQueues();
- zkw.close();
- replicationAdmin.close();
- }
-
- /**
- * This creates and fixes a bad table with a missing region -- hole in meta
- * and data present but .regioninfo missing (an orphan hdfs region)in the fs.
- */
- @Test(timeout=180000)
- public void testHDFSRegioninfoMissing() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- setupTable(tableName);
- assertEquals(ROWKEYS.length, countRows());
-
- // Mess it up by leaving a hole in the meta data
- admin.disableTable(tableName);
- deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true,
- true, false, true, RegionInfo.DEFAULT_REPLICA_ID);
- admin.enableTable(tableName);
-
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck,
- new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.ORPHAN_HDFS_REGION,
- HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
- HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
- // holes are separate from overlap groups
- assertEquals(0, hbck.getOverlapGroups(tableName).size());
-
- // fix hole
- doFsck(conf, true);
-
- // check that hole fixed
- assertNoErrors(doFsck(conf, false));
- assertEquals(ROWKEYS.length, countRows());
- } finally {
- cleanupTable(tableName);
- }
- }
-
- /**
- * This creates and fixes a bad table with a region that is missing meta and
- * not assigned to a region server.
- */
- @Test (timeout=180000)
- public void testNotInMetaOrDeployedHole() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- setupTable(tableName);
- assertEquals(ROWKEYS.length, countRows());
-
- // Mess it up by leaving a hole in the meta data
- admin.disableTable(tableName);
- deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true,
- true, false); // don't rm from fs
- admin.enableTable(tableName);
-
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck,
- new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
- HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
- // holes are separate from overlap groups
- assertEquals(0, hbck.getOverlapGroups(tableName).size());
-
- // fix hole
- assertErrors(doFsck(conf, true),
- new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
- HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
-
- // check that hole fixed
- assertNoErrors(doFsck(conf, false));
- assertEquals(ROWKEYS.length, countRows());
- } finally {
- cleanupTable(tableName);
- }
- }
-
- /**
- * This creates fixes a bad table with a hole in meta.
- */
- @Test (timeout=180000)
- public void testNotInMetaHole() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- setupTable(tableName);
- assertEquals(ROWKEYS.length, countRows());
-
- // Mess it up by leaving a hole in the meta data
- admin.disableTable(tableName);
- deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), false,
- true, false); // don't rm from fs
- admin.enableTable(tableName);
-
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck,
- new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
- HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
- // holes are separate from overlap groups
- assertEquals(0, hbck.getOverlapGroups(tableName).size());
-
- // fix hole
- assertErrors(doFsck(conf, true),
- new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
- HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
-
- // check that hole fixed
- assertNoErrors(doFsck(conf, false));
- assertEquals(ROWKEYS.length, countRows());
- } finally {
- cleanupTable(tableName);
- }
- }
-
- /**
- * This creates and fixes a bad table with a region that is in meta but has
- * no deployment or data hdfs
- */
- @Test (timeout=180000)
- public void testNotInHdfs() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- setupTable(tableName);
- assertEquals(ROWKEYS.length, countRows());
-
- // make sure data in regions, if in wal only there is no data loss
- admin.flush(tableName);
-
- // Mess it up by leaving a hole in the hdfs data
- deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), false,
- false, true); // don't rm meta
-
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
- HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS});
- // holes are separate from overlap groups
- assertEquals(0, hbck.getOverlapGroups(tableName).size());
-
- // fix hole
- doFsck(conf, true);
-
- // check that hole fixed
- assertNoErrors(doFsck(conf,false));
- assertEquals(ROWKEYS.length - 2, countRows());
- } finally {
- cleanupTable(tableName);
- }
- }
-
- /**
- * This creates a table and simulates the race situation where a concurrent compaction or split
- * has removed an colfam dir before the corruption checker got to it.
- */
- // Disabled because fails sporadically. Is this test right? Timing-wise, there could be no
- // files in a column family on initial creation -- as suggested by Matteo.
- @Ignore
- @Test(timeout=180000)
- public void testQuarantineMissingFamdir() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- // inject a fault in the hfcc created.
- final FileSystem fs = FileSystem.get(conf);
- HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) {
- @Override
- public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles)
- throws IOException {
- return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
- AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
- @Override
- protected void checkColFamDir(Path p) throws IOException {
- if (attemptedFirstHFile.compareAndSet(false, true)) {
- assertTrue(fs.delete(p, true)); // make sure delete happened.
- }
- super.checkColFamDir(p);
- }
- };
- }
- };
- doQuarantineTest(tableName, hbck, 3, 0, 0, 0, 1);
- hbck.close();
- }
-
- /**
- * This creates a table and simulates the race situation where a concurrent compaction or split
- * has removed a region dir before the corruption checker got to it.
- */
- @Test(timeout=180000)
- public void testQuarantineMissingRegionDir() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- // inject a fault in the hfcc created.
- final FileSystem fs = FileSystem.get(conf);
- HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) {
- @Override
- public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles)
- throws IOException {
- return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
- AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
- @Override
- protected void checkRegionDir(Path p) throws IOException {
- if (attemptedFirstHFile.compareAndSet(false, true)) {
- assertTrue(fs.delete(p, true)); // make sure delete happened.
- }
- super.checkRegionDir(p);
- }
- };
- }
- };
- doQuarantineTest(tableName, hbck, 3, 0, 0, 0, 1);
- hbck.close();
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/89e2869e/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckReplicas.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckReplicas.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckReplicas.java
deleted file mode 100644
index 341bd34..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckReplicas.java
+++ /dev/null
@@ -1,380 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.util;
-
-import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
-import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertNoErrors;
-import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.EnumSet;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.NavigableMap;
-import java.util.Set;
-import java.util.concurrent.ScheduledThreadPoolExecutor;
-import java.util.concurrent.SynchronousQueue;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.hadoop.hbase.ClusterStatus.Option;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HRegionLocation;
-import org.apache.hadoop.hbase.MetaTableAccessor;
-import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.ClusterConnection;
-import org.apache.hadoop.hbase.client.Delete;
-import org.apache.hadoop.hbase.client.HBaseAdmin;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.RegionInfo;
-import org.apache.hadoop.hbase.client.RegionReplicaUtil;
-import org.apache.hadoop.hbase.client.Table;
-import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
-import org.apache.hadoop.hbase.master.RegionState;
-import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
-import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.testclassification.MiscTests;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Ignore;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-
-@Ignore
-@Category({MiscTests.class, LargeTests.class})
-public class TestHBaseFsckReplicas extends BaseTestHBaseFsck {
- @Rule
- public TestName name = new TestName();
-
- @BeforeClass
- public static void setUpBeforeClass() throws Exception {
- TEST_UTIL.getConfiguration().set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY,
- MasterSyncCoprocessor.class.getName());
-
- conf.setInt("hbase.regionserver.handler.count", 2);
- conf.setInt("hbase.regionserver.metahandler.count", 30);
-
- conf.setInt("hbase.htable.threads.max", POOL_SIZE);
- conf.setInt("hbase.hconnection.threads.max", 2 * POOL_SIZE);
- conf.setInt("hbase.hbck.close.timeout", 2 * REGION_ONLINE_TIMEOUT);
- conf.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY, 8 * REGION_ONLINE_TIMEOUT);
- TEST_UTIL.startMiniCluster(3);
-
- tableExecutorService = new ThreadPoolExecutor(1, POOL_SIZE, 60, TimeUnit.SECONDS,
- new SynchronousQueue<>(), Threads.newDaemonThreadFactory("testhbck"));
-
- hbfsckExecutorService = new ScheduledThreadPoolExecutor(POOL_SIZE);
-
- AssignmentManager assignmentManager =
- TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
- regionStates = assignmentManager.getRegionStates();
-
- connection = (ClusterConnection) TEST_UTIL.getConnection();
-
- admin = connection.getAdmin();
- admin.setBalancerRunning(false, true);
-
- TEST_UTIL.waitUntilAllRegionsAssigned(TableName.META_TABLE_NAME);
- TEST_UTIL.waitUntilAllRegionsAssigned(TableName.NAMESPACE_TABLE_NAME);
- }
-
- @AfterClass
- public static void tearDownAfterClass() throws Exception {
- tableExecutorService.shutdown();
- hbfsckExecutorService.shutdown();
- admin.close();
- TEST_UTIL.shutdownMiniCluster();
- }
-
- @Before
- public void setUp() {
- EnvironmentEdgeManager.reset();
- }
-
- /*
- * This creates a table with region_replica > 1, do a split, check
- * that hbck will not report split replica parent as lingering split parent
- */
- @Test public void testHbckReportReplicaLingeringSplitParent() throws Exception {
- TableName table = TableName.valueOf("testHbckReportReplicaLingeringSplitParent");
-
- try {
- setupTableWithRegionReplica(table, 2);
- admin.flush(table);
-
- // disable catalog janitor
- admin.enableCatalogJanitor(false);
- admin.split(table, Bytes.toBytes("A1"));
-
- Thread.sleep(1000);
- // run hbck again to make sure we don't see any errors
- assertNoErrors(doFsck(conf, false));
- } finally {
- cleanupTable(table);
- // enable catalog janitor
- admin.enableCatalogJanitor(true);
- }
- }
-
- /*
- * This creates a table with region_replica > 1 and verifies hbck runs
- * successfully
- */
- @Test(timeout=180000)
- public void testHbckWithRegionReplica() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- setupTableWithRegionReplica(tableName, 2);
- admin.flush(tableName);
- assertNoErrors(doFsck(conf, false));
- } finally {
- cleanupTable(tableName);
- }
- }
-
- @Test (timeout=180000)
- public void testHbckWithFewerReplica() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- setupTableWithRegionReplica(tableName, 2);
- admin.flush(tableName);
- assertNoErrors(doFsck(conf, false));
- assertEquals(ROWKEYS.length, countRows());
- deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true,
- false, false, false, 1); // unassign one replica
- // check that problem exists
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_DEPLOYED });
- // fix the problem
- hbck = doFsck(conf, true);
- // run hbck again to make sure we don't see any errors
- hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {});
- } finally {
- cleanupTable(tableName);
- }
- }
-
- @Test (timeout=180000)
- public void testHbckWithExcessReplica() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- setupTableWithRegionReplica(tableName, 2);
- admin.flush(tableName);
- assertNoErrors(doFsck(conf, false));
- assertEquals(ROWKEYS.length, countRows());
- // the next few lines inject a location in meta for a replica, and then
- // asks the master to assign the replica (the meta needs to be injected
- // for the master to treat the request for assignment as valid; the master
- // checks the region is valid either from its memory or meta)
- Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
- List<RegionInfo> regions = admin.getRegions(tableName);
- byte[] startKey = Bytes.toBytes("B");
- byte[] endKey = Bytes.toBytes("C");
- byte[] metaKey = null;
- RegionInfo newHri = null;
- for (RegionInfo h : regions) {
- if (Bytes.compareTo(h.getStartKey(), startKey) == 0 &&
- Bytes.compareTo(h.getEndKey(), endKey) == 0 &&
- h.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
- metaKey = h.getRegionName();
- //create a hri with replicaId as 2 (since we already have replicas with replicaid 0 and 1)
- newHri = RegionReplicaUtil.getRegionInfoForReplica(h, 2);
- break;
- }
- }
- Put put = new Put(metaKey);
- Collection<ServerName> var = admin.getClusterStatus(EnumSet.of(Option.LIVE_SERVERS)).getServers();
- ServerName sn = var.toArray(new ServerName[var.size()])[0];
- //add a location with replicaId as 2 (since we already have replicas with replicaid 0 and 1)
- MetaTableAccessor.addLocation(put, sn, sn.getStartcode(), -1, 2);
- meta.put(put);
- // assign the new replica
- HBaseFsckRepair.fixUnassigned(admin, newHri);
- HBaseFsckRepair.waitUntilAssigned(admin, newHri);
- // now reset the meta row to its original value
- Delete delete = new Delete(metaKey);
- delete.addColumns(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(2));
- delete.addColumns(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(2));
- delete.addColumns(HConstants.CATALOG_FAMILY, MetaTableAccessor.getSeqNumColumn(2));
- meta.delete(delete);
- meta.close();
- // check that problem exists
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[]{HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META});
- // fix the problem
- hbck = doFsck(conf, true);
- // run hbck again to make sure we don't see any errors
- hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[]{});
- } finally {
- cleanupTable(tableName);
- }
- }
-
- /**
- * This creates and fixes a bad table with a region that is in meta but has
- * no deployment or data hdfs. The table has region_replication set to 2.
- */
- @Test (timeout=180000)
- public void testNotInHdfsWithReplicas() throws Exception {
- final TableName tableName = TableName.valueOf(name.getMethodName());
- try {
- RegionInfo[] oldHris = new RegionInfo[2];
- setupTableWithRegionReplica(tableName, 2);
- assertEquals(ROWKEYS.length, countRows());
- NavigableMap<RegionInfo, ServerName> map =
- MetaTableAccessor.allTableRegions(TEST_UTIL.getConnection(),
- tbl.getName());
- int i = 0;
- // store the HRIs of the regions we will mess up
- for (Map.Entry<RegionInfo, ServerName> m : map.entrySet()) {
- if (m.getKey().getStartKey().length > 0 &&
- m.getKey().getStartKey()[0] == Bytes.toBytes("B")[0]) {
- LOG.debug("Initially server hosting " + m.getKey() + " is " + m.getValue());
- oldHris[i++] = m.getKey();
- }
- }
- // make sure data in regions
- admin.flush(tableName);
-
- // Mess it up by leaving a hole in the hdfs data
- deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), false,
- false, true); // don't rm meta
-
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS });
-
- // fix hole
- doFsck(conf, true);
-
- // check that hole fixed
- assertNoErrors(doFsck(conf, false));
- assertEquals(ROWKEYS.length - 2, countRows());
-
- // the following code checks whether the old primary/secondary has
- // been unassigned and the new primary/secondary has been assigned
- i = 0;
- RegionInfo[] newHris = new RegionInfo[2];
- // get all table's regions from meta
- map = MetaTableAccessor.allTableRegions(TEST_UTIL.getConnection(), tbl.getName());
- // get the HRIs of the new regions (hbck created new regions for fixing the hdfs mess-up)
- for (Map.Entry<RegionInfo, ServerName> m : map.entrySet()) {
- if (m.getKey().getStartKey().length > 0 &&
- m.getKey().getStartKey()[0] == Bytes.toBytes("B")[0]) {
- newHris[i++] = m.getKey();
- }
- }
- // get all the online regions in the regionservers
- Collection<ServerName> servers =
- admin.getClusterStatus(EnumSet.of(Option.LIVE_SERVERS)).getServers();
- Set<RegionInfo> onlineRegions = new HashSet<>();
- for (ServerName s : servers) {
- List<RegionInfo> list = admin.getRegions(s);
- onlineRegions.addAll(list);
- }
- // the new HRIs must be a subset of the online regions
- assertTrue(onlineRegions.containsAll(Arrays.asList(newHris)));
- // the old HRIs must not be part of the set (removeAll would return false if
- // the set didn't change)
- assertFalse(onlineRegions.removeAll(Arrays.asList(oldHris)));
- } finally {
- cleanupTable(tableName);
- admin.close();
- }
- }
-
- /**
- * Creates and fixes a bad table with a successful split that have a deployed
- * start and end keys and region replicas enabled
- */
- @Test (timeout=180000)
- public void testSplitAndDupeRegionWithRegionReplica() throws Exception {
- TableName table =
- TableName.valueOf("testSplitAndDupeRegionWithRegionReplica");
- Table meta = null;
-
- try {
- setupTableWithRegionReplica(table, 2);
-
- assertNoErrors(doFsck(conf, false));
- assertEquals(ROWKEYS.length, countRows());
-
- // No Catalog Janitor running
- admin.enableCatalogJanitor(false);
- meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
- HRegionLocation loc = this.connection.getRegionLocation(table, SPLITS[0], false);
- RegionInfo hriParent = loc.getRegionInfo();
-
- // Split Region A just before B
- this.connection.getAdmin().split(table, Bytes.toBytes("A@"));
- Thread.sleep(1000);
-
- // We need to make sure the parent region is not in a split state, so we put it in CLOSED state.
- regionStates.updateRegionState(hriParent, RegionState.State.CLOSED);
- TEST_UTIL.assignRegion(hriParent);
- MetaTableAccessor.addRegionToMeta(meta, hriParent);
- ServerName server = regionStates.getRegionServerOfRegion(hriParent);
-
- if (server != null)
- TEST_UTIL.assertRegionOnServer(hriParent, server, REGION_ONLINE_TIMEOUT);
-
- while (findDeployedHSI(getDeployedHRIs((HBaseAdmin) admin), hriParent) == null) {
- Thread.sleep(250);
- }
-
- LOG.debug("Finished assignment of parent region");
-
- // TODO why is dupe region different from dupe start keys?
- HBaseFsck hbck = doFsck(conf, false);
- assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_DEPLOYED,
- HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS,
- HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS, HBaseFsck.ErrorReporter.ERROR_CODE.OVERLAP_IN_REGION_CHAIN});
- assertEquals(3, hbck.getOverlapGroups(table).size());
-
- // fix the degenerate region.
- hbck = new HBaseFsck(conf, hbfsckExecutorService);
- hbck.setDisplayFullReport(); // i.e. -details
- hbck.setTimeLag(0);
- hbck.setFixHdfsOverlaps(true);
- hbck.setRemoveParents(true);
- hbck.setFixReferenceFiles(true);
- hbck.setFixHFileLinks(true);
- hbck.connect();
- hbck.onlineHbck();
- hbck.close();
-
- hbck = doFsck(conf, false);
-
- assertNoErrors(hbck);
- assertEquals(0, hbck.getOverlapGroups(table).size());
- assertEquals(ROWKEYS.length, countRows());
- } finally {
- cleanupTable(table);
- }
- }
-}