You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2011/01/10 23:37:08 UTC
svn commit: r1057404 -
/hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
Author: stack
Date: Mon Jan 10 22:37:08 2011
New Revision: 1057404
URL: http://svn.apache.org/viewvc?rev=1057404&view=rev
Log:
HBASE-3403 Region orphaned after failure during split -- forgot to add Test
Added:
hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
Added: hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java?rev=1057404&view=auto
==============================================================================
--- hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java (added)
+++ hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java Mon Jan 10 22:37:08 2011
@@ -0,0 +1,290 @@
+/**
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.MasterNotRunningException;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.UnknownRegionException;
+import org.apache.hadoop.hbase.ZooKeeperConnectionException;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Like {@link TestSplitTransaction} in that we're testing {@link SplitTransaction}
+ * only the below tests are against a running cluster where {@link TestSplitTransaction}
+ * is tests against a bare {@link HRegion}.
+ */
+public class TestSplitTransactionOnCluster {
+ private static final Log LOG =
+ LogFactory.getLog(TestSplitTransactionOnCluster.class);
+ private HBaseAdmin admin = null;
+ private MiniHBaseCluster cluster = null;
+
+ private static final HBaseTestingUtility TESTING_UTIL =
+ new HBaseTestingUtility();
+
+ @BeforeClass public static void before() throws Exception {
+ TESTING_UTIL.startMiniCluster(2);
+ }
+
+ @AfterClass public static void after() throws Exception {
+ TESTING_UTIL.shutdownMiniCluster();
+ }
+
+ @Before public void setup() throws IOException {
+ TESTING_UTIL.ensureSomeRegionServersAvailable(2);
+ this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
+ this.cluster = TESTING_UTIL.getMiniHBaseCluster();
+ }
+
+ /**
+ * Messy test that simulates case where SplitTransactions fails to add one
+ * of the daughters up into the .META. table before crash. We're testing
+ * fact that the shutdown handler will fixup the missing daughter region
+ * adding it back into .META.
+ * @throws IOException
+ * @throws InterruptedException
+ */
+ @Test (timeout = 600000) public void testShutdownSimpleFixup()
+ throws IOException, InterruptedException {
+ final byte [] tableName = Bytes.toBytes("testShutdownSimpleFixup");
+
+ // Create table then get the single region for our new table.
+ HTable t = TESTING_UTIL.createTable(tableName, HConstants.CATALOG_FAMILY);
+
+ List<HRegion> regions = cluster.getRegions(tableName);
+ assertEquals(1, regions.size());
+ HRegionInfo hri = regions.get(0).getRegionInfo();
+
+ int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
+
+ // Turn off balancer so it doesn't cut in and mess up our placements.
+ this.admin.balanceSwitch(false);
+ // Turn off the meta scanner so it don't remove parent on us.
+ cluster.getMaster().setCatalogJanitorEnabled(false);
+ try {
+ // Add a bit of load up into the table so splittable.
+ TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
+ // Get region pre-split.
+ HRegionServer server = cluster.getRegionServer(tableRegionIndex);
+ printOutRegions(server, "Initial regions: ");
+ int regionCount = server.getOnlineRegions().size();
+ // Now split.
+ split(hri, server, regionCount);
+ // Get daughters
+ List<HRegion> daughters = cluster.getRegions(tableName);
+ assertTrue(daughters.size() >= 2);
+ // Remove one of the daughters from .META. to simulate failed insert of
+ // daughter region up into .META.
+ removeDaughterFromMeta(daughters.get(0).getRegionName());
+ // Now crash the server
+ cluster.abortRegionServer(tableRegionIndex);
+ while(server.getOnlineRegions().size() > 0) {
+ LOG.info("Waiting on server to go down");
+ Thread.sleep(100);
+ }
+ // Wait till regions are back on line again.
+ while(cluster.getRegions(tableName).size() < daughters.size()) {
+ LOG.info("Waiting for repair to happen");
+ Thread.sleep(1000);
+ }
+ // Assert daughters are online.
+ regions = cluster.getRegions(tableName);
+ for (HRegion r: regions) {
+ assertTrue(daughters.contains(r));
+ }
+ } finally {
+ admin.balanceSwitch(true);
+ cluster.getMaster().setCatalogJanitorEnabled(true);
+ }
+ }
+
+ /**
+ * Test that if daughter split on us, we won't do the shutdown handler fixup
+ * just because we can't find the immediate daughter of an offlined parent.
+ * @throws IOException
+ * @throws InterruptedException
+ */
+ @Test public void testShutdownFixupWhenDaughterHasSplit()
+ throws IOException, InterruptedException {
+ final byte [] tableName =
+ Bytes.toBytes("testShutdownFixupWhenDaughterHasSplit");
+
+ // Create table then get the single region for our new table.
+ HTable t = TESTING_UTIL.createTable(tableName, HConstants.CATALOG_FAMILY);
+
+ List<HRegion> regions = cluster.getRegions(tableName);
+ assertEquals(1, regions.size());
+ HRegionInfo hri = regions.get(0).getRegionInfo();
+
+ int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
+
+ // Turn off balancer so it doesn't cut in and mess up our placements.
+ this.admin.balanceSwitch(false);
+ // Turn off the meta scanner so it don't remove parent on us.
+ cluster.getMaster().setCatalogJanitorEnabled(false);
+ try {
+ // Add a bit of load up into the table so splittable.
+ TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
+ // Get region pre-split.
+ HRegionServer server = cluster.getRegionServer(tableRegionIndex);
+ printOutRegions(server, "Initial regions: ");
+ int regionCount = server.getOnlineRegions().size();
+ // Now split.
+ split(hri, server, regionCount);
+ // Get daughters
+ List<HRegion> daughters = cluster.getRegions(tableName);
+ assertTrue(daughters.size() >= 2);
+ // Now split one of the daughters.
+ regionCount = server.getOnlineRegions().size();
+ split(daughters.get(0).getRegionInfo(), server, regionCount);
+ // Get list of daughters
+ daughters = cluster.getRegions(tableName);
+ // Now crash the server
+ cluster.abortRegionServer(tableRegionIndex);
+ while(server.getOnlineRegions().size() > 0) {
+ LOG.info("Waiting on server to go down");
+ Thread.sleep(100);
+ }
+ // Wait till regions are back on line again.
+ while(cluster.getRegions(tableName).size() < daughters.size()) {
+ LOG.info("Waiting for repair to happen");
+ Thread.sleep(1000);
+ }
+ // Assert daughters are online and ONLY the original daughters -- that
+ // fixup didn't insert one during server shutdown recover.
+ regions = cluster.getRegions(tableName);
+ assertEquals(daughters.size(), regions.size());
+ for (HRegion r: regions) {
+ assertTrue(daughters.contains(r));
+ }
+ } finally {
+ admin.balanceSwitch(true);
+ cluster.getMaster().setCatalogJanitorEnabled(true);
+ }
+ }
+
+ private void split(final HRegionInfo hri, final HRegionServer server,
+ final int regionCount)
+ throws IOException, InterruptedException {
+ this.admin.split(hri.getRegionNameAsString());
+ while(server.getOnlineRegions().size() <= regionCount) {
+ LOG.debug("Waiting on region to split");
+ Thread.sleep(100);
+ }
+ }
+
+ private void removeDaughterFromMeta(final byte [] regionName) throws IOException {
+ HTable metaTable =
+ new HTable(TESTING_UTIL.getConfiguration(), HConstants.META_TABLE_NAME);
+ Delete d = new Delete(regionName);
+ LOG.info("Deleted " + Bytes.toString(regionName));
+ metaTable.delete(d);
+ }
+
+ /**
+ * Ensure single table region is not on same server as the single .META. table
+ * region.
+ * @param admin
+ * @param hri
+ * @return Index of the server hosting the single table region
+ * @throws UnknownRegionException
+ * @throws MasterNotRunningException
+ * @throws ZooKeeperConnectionException
+ * @throws InterruptedException
+ */
+ private int ensureTableRegionNotOnSameServerAsMeta(final HBaseAdmin admin,
+ final HRegionInfo hri)
+ throws UnknownRegionException, MasterNotRunningException,
+ ZooKeeperConnectionException, InterruptedException {
+ MiniHBaseCluster cluster = TESTING_UTIL.getMiniHBaseCluster();
+ // Now make sure that the table region is not on same server as that hosting
+ // .META. We don't want .META. replay polluting our test when we later crash
+ // the table region serving server.
+ int metaServerIndex = cluster.getServerWithMeta();
+ assertTrue(metaServerIndex != -1);
+ HRegionServer metaRegionServer = cluster.getRegionServer(metaServerIndex);
+ int tableRegionIndex = cluster.getServerWith(hri.getRegionName());
+ assertTrue(tableRegionIndex != -1);
+ HRegionServer tableRegionServer = cluster.getRegionServer(tableRegionIndex);
+ if (metaRegionServer.getServerName().equals(tableRegionServer.getServerName())) {
+ HRegionServer hrs = getOtherRegionServer(cluster, metaRegionServer);
+ admin.move(hri.getEncodedNameAsBytes(), Bytes.toBytes(hrs.getServerName()));
+ }
+ // Wait till table region is up on the server that is NOT carrying .META..
+ while (true) {
+ tableRegionIndex = cluster.getServerWith(hri.getRegionName());
+ if (tableRegionIndex != -1 && tableRegionIndex != metaServerIndex) break;
+ LOG.debug("Waiting on region move off the .META. server; current index " +
+ tableRegionIndex);
+ Thread.sleep(100);
+ }
+ // Verify for sure table region is not on same server as .META.
+ tableRegionIndex = cluster.getServerWith(hri.getRegionName());
+ assertTrue(tableRegionIndex != -1);
+ assertNotSame(metaServerIndex, tableRegionIndex);
+ return tableRegionIndex;
+ }
+
+ /**
+ * Find regionserver other than the one passed.
+ * Can't rely on indexes into list of regionservers since crashed servers
+ * occupy an index.
+ * @param cluster
+ * @param notThisOne
+ * @return A regionserver that is not <code>notThisOne</code> or null if none
+ * found
+ */
+ private HRegionServer getOtherRegionServer(final MiniHBaseCluster cluster,
+ final HRegionServer notThisOne) {
+ for (RegionServerThread rst: cluster.getRegionServerThreads()) {
+ HRegionServer hrs = rst.getRegionServer();
+ if (hrs.getServerName().equals(notThisOne.getServerName())) continue;
+ return hrs;
+ }
+ return null;
+ }
+
+ private void printOutRegions(final HRegionServer hrs, final String prefix) {
+ List<HRegionInfo> regions = hrs.getOnlineRegions();
+ for (HRegionInfo region: regions) {
+ LOG.info(prefix + region.getRegionNameAsString());
+ }
+ }
+}
\ No newline at end of file