You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by jx...@apache.org on 2013/10/11 23:44:45 UTC
svn commit: r1531434 [2/2] - in /hbase/branches/0.96:
hbase-client/src/main/java/org/apache/hadoop/hbase/executor/
hbase-client/src/main/java/org/apache/hadoop/hbase/master/
hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/
hbase-protocol/s...
Modified: hbase/branches/0.96/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.96/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java?rev=1531434&r1=1531433&r2=1531434&view=diff
==============================================================================
--- hbase/branches/0.96/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java (original)
+++ hbase/branches/0.96/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java Fri Oct 11 21:44:44 2013
@@ -18,6 +18,10 @@
*/
package org.apache.hadoop.hbase.regionserver;
+import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REQUEST_REGION_SPLIT;
+import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REGION_SPLIT;
+import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REGION_SPLITTING;
+
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@@ -50,6 +54,7 @@ import org.apache.hadoop.hbase.zookeeper
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.NodeExistsException;
+import org.apache.zookeeper.data.Stat;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
@@ -234,27 +239,18 @@ public class SplitTransaction {
if (server != null && server.getZooKeeper() != null) {
try {
createNodeSplitting(server.getZooKeeper(),
- this.parent.getRegionInfo(), server.getServerName());
+ parent.getRegionInfo(), server.getServerName(), hri_a, hri_b);
} catch (KeeperException e) {
- throw new IOException("Failed creating SPLITTING znode on " +
+ throw new IOException("Failed creating PENDING_SPLIT znode on " +
this.parent.getRegionNameAsString(), e);
}
}
this.journal.add(JournalEntry.SET_SPLITTING_IN_ZK);
if (server != null && server.getZooKeeper() != null) {
- try {
- // Transition node from SPLITTING to SPLITTING after creating the split node.
- // Master will get the callback for node change only if the transition is successful.
- // Note that if the transition fails then the rollback will delete the created znode
- // as the journal entry SET_SPLITTING_IN_ZK is added.
- // TODO : May be we can add some new state to znode and handle the new state incase
- // of success/failure
- this.znodeVersion = transitionNodeSplitting(server.getZooKeeper(),
- this.parent.getRegionInfo(), server.getServerName(), -1);
- } catch (KeeperException e) {
- throw new IOException("Failed setting SPLITTING znode on "
- + this.parent.getRegionNameAsString(), e);
- }
+ // After creating the split node, wait for master to transition it
+ // from PENDING_SPLIT to SPLITTING so that we can move on. We want master
+ // knows about it and won't transition any region which is splitting.
+ znodeVersion = getZKNode(server, services);
}
this.parent.getRegionFileSystem().createSplitsDir();
@@ -409,9 +405,10 @@ public class SplitTransaction {
// Tell master about split by updating zk. If we fail, abort.
if (server != null && server.getZooKeeper() != null) {
try {
- this.znodeVersion = transitionNodeSplit(server.getZooKeeper(),
+ this.znodeVersion = transitionSplittingNode(server.getZooKeeper(),
parent.getRegionInfo(), a.getRegionInfo(), b.getRegionInfo(),
- server.getServerName(), this.znodeVersion);
+ server.getServerName(), this.znodeVersion,
+ RS_ZK_REGION_SPLITTING, RS_ZK_REGION_SPLIT);
int spins = 0;
// Now wait for the master to process the split. We know it's done
@@ -424,9 +421,10 @@ public class SplitTransaction {
}
Thread.sleep(100);
// When this returns -1 it means the znode doesn't exist
- this.znodeVersion = tickleNodeSplit(server.getZooKeeper(),
+ this.znodeVersion = transitionSplittingNode(server.getZooKeeper(),
parent.getRegionInfo(), a.getRegionInfo(), b.getRegionInfo(),
- server.getServerName(), this.znodeVersion);
+ server.getServerName(), this.znodeVersion,
+ RS_ZK_REGION_SPLIT, RS_ZK_REGION_SPLIT);
spins++;
} while (this.znodeVersion != -1 && !server.isStopped()
&& !services.isStopping());
@@ -449,6 +447,76 @@ public class SplitTransaction {
}
/**
+ * Wait for the splitting node to be transitioned from pending_split
+ * to splitting by master. That's how we are sure master has processed
+ * the event and is good with us to move on. If we don't get any update,
+ * we periodically transition the node so that master gets the callback.
+ * If the node is removed or is not in pending_split state any more,
+ * we abort the split.
+ */
+ private int getZKNode(final Server server,
+ final RegionServerServices services) throws IOException {
+ // Wait for the master to process the pending_split.
+ try {
+ int spins = 0;
+ Stat stat = new Stat();
+ ZooKeeperWatcher zkw = server.getZooKeeper();
+ ServerName expectedServer = server.getServerName();
+ String node = parent.getRegionInfo().getEncodedName();
+ while (!(server.isStopped() || services.isStopping())) {
+ if (spins % 5 == 0) {
+ LOG.debug("Still waiting for master to process "
+ + "the pending_split for " + node);
+ transitionSplittingNode(zkw, parent.getRegionInfo(),
+ hri_a, hri_b, expectedServer, -1, RS_ZK_REQUEST_REGION_SPLIT,
+ RS_ZK_REQUEST_REGION_SPLIT);
+ }
+ Thread.sleep(100);
+ spins++;
+ byte [] data = ZKAssign.getDataNoWatch(zkw, node, stat);
+ if (data == null) {
+ throw new IOException("Data is null, splitting node "
+ + node + " no longer exists");
+ }
+ RegionTransition rt = RegionTransition.parseFrom(data);
+ EventType et = rt.getEventType();
+ if (et == RS_ZK_REGION_SPLITTING) {
+ ServerName serverName = rt.getServerName();
+ if (!serverName.equals(expectedServer)) {
+ throw new IOException("Splitting node " + node + " is for "
+ + serverName + ", not us " + expectedServer);
+ }
+ byte [] payloadOfSplitting = rt.getPayload();
+ List<HRegionInfo> splittingRegions = HRegionInfo.parseDelimitedFrom(
+ payloadOfSplitting, 0, payloadOfSplitting.length);
+ assert splittingRegions.size() == 2;
+ HRegionInfo a = splittingRegions.get(0);
+ HRegionInfo b = splittingRegions.get(1);
+ if (!(hri_a.equals(a) && hri_b.equals(b))) {
+ throw new IOException("Splitting node " + node + " is for " + a + ", "
+ + b + ", not expected daughters: " + hri_a + ", " + hri_b);
+ }
+ // Master has processed it.
+ return stat.getVersion();
+ }
+ if (et != RS_ZK_REQUEST_REGION_SPLIT) {
+ throw new IOException("Splitting node " + node
+ + " moved out of splitting to " + et);
+ }
+ }
+ // Server is stopping/stopped
+ throw new IOException("Server is "
+ + (services.isStopping() ? "stopping" : "stopped"));
+ } catch (Exception e) {
+ if (e instanceof InterruptedException) {
+ Thread.currentThread().interrupt();
+ }
+ throw new IOException("Failed getting SPLITTING znode on "
+ + parent.getRegionNameAsString(), e);
+ }
+ }
+
+ /**
* Run the transaction.
* @param server Hosting server instance. Can be null when testing (won't try
* and update in zk if a null server)
@@ -641,6 +709,7 @@ public class SplitTransaction {
* @return True if we successfully rolled back, false if we got to the point
* of no return and so now need to abort the server to minimize damage.
*/
+ @SuppressWarnings("deprecation")
public boolean rollback(final Server server, final RegionServerServices services)
throws IOException {
// Coprocessor callback
@@ -723,15 +792,20 @@ public class SplitTransaction {
private static void cleanZK(final Server server, final HRegionInfo hri) {
try {
// Only delete if its in expected state; could have been hijacked.
- ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
- EventType.RS_ZK_REGION_SPLITTING);
+ if (!ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
+ RS_ZK_REQUEST_REGION_SPLIT)) {
+ ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
+ RS_ZK_REGION_SPLITTING);
+ }
+ } catch (KeeperException.NoNodeException e) {
+ LOG.warn("Failed cleanup zk node of " + hri.getRegionNameAsString(), e);
} catch (KeeperException e) {
server.abort("Failed cleanup of " + hri.getRegionNameAsString(), e);
}
}
/**
- * Creates a new ephemeral node in the SPLITTING state for the specified region.
+ * Creates a new ephemeral node in the PENDING_SPLIT state for the specified region.
* Create it ephemeral in case regionserver dies mid-split.
*
* <p>Does not transition nodes from other states. If a node already exists
@@ -740,91 +814,63 @@ public class SplitTransaction {
* @param zkw zk reference
* @param region region to be created as offline
* @param serverName server event originates from
- * @return Version of znode created.
* @throws KeeperException
* @throws IOException
*/
- int createNodeSplitting(final ZooKeeperWatcher zkw, final HRegionInfo region,
- final ServerName serverName) throws KeeperException, IOException {
+ public static void createNodeSplitting(final ZooKeeperWatcher zkw, final HRegionInfo region,
+ final ServerName serverName, final HRegionInfo a,
+ final HRegionInfo b) throws KeeperException, IOException {
LOG.debug(zkw.prefix("Creating ephemeral node for " +
- region.getEncodedName() + " in SPLITTING state"));
- RegionTransition rt = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING,
- region.getRegionName(), serverName);
+ region.getEncodedName() + " in PENDING_SPLIT state"));
+ byte [] payload = HRegionInfo.toDelimitedByteArray(a, b);
+ RegionTransition rt = RegionTransition.createRegionTransition(
+ RS_ZK_REQUEST_REGION_SPLIT, region.getRegionName(), serverName, payload);
String node = ZKAssign.getNodeName(zkw, region.getEncodedName());
if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, rt.toByteArray())) {
throw new IOException("Failed create of ephemeral " + node);
}
- // Transition node from SPLITTING to SPLITTING and pick up version so we
- // can be sure this znode is ours; version is needed deleting.
- return transitionNodeSplitting(zkw, region, serverName, -1);
}
/**
- * Transitions an existing node for the specified region which is
- * currently in the SPLITTING state to be in the SPLIT state. Converts the
- * ephemeral SPLITTING znode to an ephemeral SPLIT node. Master cleans up
- * SPLIT znode when it reads it (or if we crash, zk will clean it up).
+ * Transitions an existing ephemeral node for the specified region which is
+ * currently in the begin state to be in the end state. Master cleans up the
+ * final SPLIT znode when it reads it (or if we crash, zk will clean it up).
*
- * <p>Does not transition nodes from other states. If for some reason the
- * node could not be transitioned, the method returns -1. If the transition
+ * <p>Does not transition nodes from other states. If for some reason the
+ * node could not be transitioned, the method returns -1. If the transition
* is successful, the version of the node after transition is returned.
*
* <p>This method can fail and return false for three different reasons:
* <ul><li>Node for this region does not exist</li>
- * <li>Node for this region is not in SPLITTING state</li>
- * <li>After verifying SPLITTING state, update fails because of wrong version
+ * <li>Node for this region is not in the begin state</li>
+ * <li>After verifying the begin state, update fails because of wrong version
* (this should never actually happen since an RS only does this transition
- * following a transition to SPLITTING. if two RS are conflicting, one would
- * fail the original transition to SPLITTING and not this transition)</li>
+ * following a transition to the begin state. If two RS are conflicting, one would
+ * fail the original transition to the begin state and not this transition)</li>
* </ul>
*
* <p>Does not set any watches.
*
- * <p>This method should only be used by a RegionServer when completing the
- * open of a region.
+ * <p>This method should only be used by a RegionServer when splitting a region.
*
* @param zkw zk reference
* @param parent region to be transitioned to opened
* @param a Daughter a of split
* @param b Daughter b of split
* @param serverName server event originates from
+ * @param znodeVersion expected version of data before modification
+ * @param beginState the expected current state the znode should be
+ * @param endState the state to be transition to
* @return version of node after transition, -1 if unsuccessful transition
* @throws KeeperException if unexpected zookeeper exception
* @throws IOException
*/
- private static int transitionNodeSplit(ZooKeeperWatcher zkw,
- HRegionInfo parent, HRegionInfo a, HRegionInfo b, ServerName serverName,
- final int znodeVersion)
- throws KeeperException, IOException {
- byte [] payload = HRegionInfo.toDelimitedByteArray(a, b);
- return ZKAssign.transitionNode(zkw, parent, serverName,
- EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLIT,
- znodeVersion, payload);
- }
-
- /**
- *
- * @param zkw zk reference
- * @param parent region to be transitioned to splitting
- * @param serverName server event originates from
- * @param version znode version
- * @return version of node after transition, -1 if unsuccessful transition
- * @throws KeeperException
- * @throws IOException
- */
- int transitionNodeSplitting(final ZooKeeperWatcher zkw, final HRegionInfo parent,
- final ServerName serverName, final int version) throws KeeperException, IOException {
- return ZKAssign.transitionNode(zkw, parent, serverName,
- EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLITTING, version);
- }
-
- private static int tickleNodeSplit(ZooKeeperWatcher zkw,
+ public static int transitionSplittingNode(ZooKeeperWatcher zkw,
HRegionInfo parent, HRegionInfo a, HRegionInfo b, ServerName serverName,
- final int znodeVersion)
- throws KeeperException, IOException {
+ final int znodeVersion, final EventType beginState,
+ final EventType endState) throws KeeperException, IOException {
byte [] payload = HRegionInfo.toDelimitedByteArray(a, b);
return ZKAssign.transitionNode(zkw, parent, serverName,
- EventType.RS_ZK_REGION_SPLIT, EventType.RS_ZK_REGION_SPLIT,
- znodeVersion, payload);
+ beginState, endState, znodeVersion, payload);
}
}
Modified: hbase/branches/0.96/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMaster.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.96/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMaster.java?rev=1531434&r1=1531433&r2=1531434&view=diff
==============================================================================
--- hbase/branches/0.96/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMaster.java (original)
+++ hbase/branches/0.96/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMaster.java Fri Oct 11 21:44:44 2013
@@ -84,7 +84,8 @@ public class TestMaster {
LOG.info("Splitting table");
TEST_UTIL.getHBaseAdmin().split(TABLENAME.getName());
LOG.info("Waiting for split result to be about to open");
- while (!m.assignmentManager.wasSplitHandlerCalled()) {
+ RegionStates regionStates = m.assignmentManager.getRegionStates();
+ while (regionStates.getRegionsOfTable(TABLENAME).size() <= 1) {
Thread.sleep(100);
}
LOG.info("Making sure we can call getTableRegions while opening");
Modified: hbase/branches/0.96/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.96/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java?rev=1531434&r1=1531433&r2=1531434&view=diff
==============================================================================
--- hbase/branches/0.96/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java (original)
+++ hbase/branches/0.96/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java Fri Oct 11 21:44:44 2013
@@ -37,7 +37,6 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Abortable;
import org.apache.hadoop.hbase.ClusterStatus;
-import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
@@ -47,14 +46,17 @@ import org.apache.hadoop.hbase.LargeTest
import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.RegionTransition;
import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.executor.EventType;
+import org.apache.hadoop.hbase.master.RegionState.State;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.regionserver.RegionMergeTransaction;
import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.FSTableDescriptors;
+import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.JVMClusterUtil;
import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
@@ -148,7 +150,7 @@ public class TestMasterFailover {
* </ul>
* @throws Exception
*/
- @Test (timeout=180000)
+ @Test (timeout=240000)
public void testMasterFailoverWithMockedRIT() throws Exception {
final int NUM_MASTERS = 1;
@@ -214,10 +216,30 @@ public class TestMasterFailover {
List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
+ TableName tableWithMergingRegions = TableName.valueOf("tableWithMergingRegions");
+ TEST_UTIL.createTable(tableWithMergingRegions, FAMILY, new byte [][] {Bytes.toBytes("m")});
+
log("Regions in hbase:meta and namespace have been created");
- // at this point we only expect 3 regions to be assigned out (catalogs and namespace)
- assertEquals(2, cluster.countServedRegions());
+ // at this point we only expect 4 regions to be assigned out
+ // (catalogs and namespace, + 2 merging regions)
+ assertEquals(4, cluster.countServedRegions());
+
+ // Move merging regions to the same region server
+ AssignmentManager am = master.getAssignmentManager();
+ RegionStates regionStates = am.getRegionStates();
+ List<HRegionInfo> mergingRegions = regionStates.getRegionsOfTable(tableWithMergingRegions);
+ assertEquals(2, mergingRegions.size());
+ HRegionInfo a = mergingRegions.get(0);
+ HRegionInfo b = mergingRegions.get(1);
+ HRegionInfo newRegion = RegionMergeTransaction.getMergedRegionInfo(a, b);
+ ServerName mergingServer = regionStates.getRegionServerOfRegion(a);
+ ServerName serverB = regionStates.getRegionServerOfRegion(b);
+ if (!serverB.equals(mergingServer)) {
+ RegionPlan plan = new RegionPlan(b, serverB, mergingServer);
+ am.balance(plan);
+ assertTrue(am.waitForAssignment(b));
+ }
// Let's just assign everything to first RS
HRegionServer hrs = cluster.getRegionServer(0);
@@ -340,6 +362,15 @@ public class TestMasterFailover {
}
/*
+ * ZK = MERGING
+ */
+
+ // Regions of table of merging regions
+ // Cause: Master was down while merging was going on
+ RegionMergeTransaction.createNodeMerging(
+ zkw, newRegion, mergingServer, a, b);
+
+ /*
* ZK = NONE
*/
@@ -356,6 +387,16 @@ public class TestMasterFailover {
cluster.waitForActiveAndReadyMaster();
log("Master is ready");
+ // Get new region states since master restarted
+ regionStates = master.getAssignmentManager().getRegionStates();
+ // Merging region should remain merging
+ assertTrue(regionStates.isRegionInState(a, State.MERGING));
+ assertTrue(regionStates.isRegionInState(b, State.MERGING));
+ assertTrue(regionStates.isRegionInState(newRegion, State.MERGING_NEW));
+ // Now remove the faked merging znode, merging regions should be
+ // offlined automatically, otherwise it is a bug in AM.
+ ZKAssign.deleteNodeFailSilent(zkw, newRegion);
+
// Failover should be completed, now wait for no RIT
log("Waiting for no more RIT");
ZKAssign.blockUntilNoRIT(zkw);
@@ -375,6 +416,9 @@ public class TestMasterFailover {
// Everything that should be offline should not be online
for (HRegionInfo hri : regionsThatShouldBeOffline) {
+ if (onlineRegions.contains(hri)) {
+ LOG.debug(hri);
+ }
assertFalse(onlineRegions.contains(hri));
}
@@ -384,7 +428,6 @@ public class TestMasterFailover {
TEST_UTIL.shutdownMiniCluster();
}
-
/**
* Complex test of master failover that tests as many permutations of the
* different possible states that regions in transition could be in within ZK
@@ -794,7 +837,8 @@ public class TestMasterFailover {
long maxTime = 120000;
boolean done = master.assignmentManager.waitUntilNoRegionsInTransition(maxTime);
if (!done) {
- LOG.info("rit=" + master.getAssignmentManager().getRegionStates().getRegionsInTransition());
+ RegionStates regionStates = master.getAssignmentManager().getRegionStates();
+ LOG.info("rit=" + regionStates.getRegionsInTransition());
}
long elapsed = System.currentTimeMillis() - now;
assertTrue("Elapsed=" + elapsed + ", maxTime=" + maxTime + ", done=" + done,
Modified: hbase/branches/0.96/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.96/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java?rev=1531434&r1=1531433&r2=1531434&view=diff
==============================================================================
--- hbase/branches/0.96/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java (original)
+++ hbase/branches/0.96/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java Fri Oct 11 21:44:44 2013
@@ -35,7 +35,6 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Abortable;
-import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.HBaseIOException;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
@@ -48,6 +47,7 @@ import org.apache.hadoop.hbase.MiniHBase
import org.apache.hadoop.hbase.RegionTransition;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.UnknownRegionException;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.catalog.MetaReader;
@@ -65,11 +65,11 @@ import org.apache.hadoop.hbase.master.HM
import org.apache.hadoop.hbase.master.RegionState;
import org.apache.hadoop.hbase.master.RegionState.State;
import org.apache.hadoop.hbase.master.RegionStates;
-import org.apache.hadoop.hbase.master.handler.SplitRegionHandler;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
+import org.apache.hadoop.hbase.util.PairOfSameType;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
@@ -278,7 +278,7 @@ public class TestSplitTransactionOnClust
int regionCount = ProtobufUtil.getOnlineRegions(server).size();
// Now, before we split, set special flag in master, a flag that has
// it FAIL the processing of split.
- SplitRegionHandler.TEST_SKIP = true;
+ AssignmentManager.TEST_SKIP_SPLIT_HANDLING = true;
// Now try splitting and it should work.
split(hri, server, regionCount);
// Get daughters
@@ -286,15 +286,18 @@ public class TestSplitTransactionOnClust
// Assert the ephemeral node is up in zk.
String path = ZKAssign.getNodeName(TESTING_UTIL.getZooKeeperWatcher(),
hri.getEncodedName());
- Stat stats =
- TESTING_UTIL.getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false);
- LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats=" + stats);
- RegionTransition rt =
- RegionTransition.parseFrom(ZKAssign.getData(TESTING_UTIL.getZooKeeperWatcher(),
+ RegionTransition rt = null;
+ Stat stats = null;
+ // Wait till the znode moved to SPLIT
+ for (int i=0; i<100; i++) {
+ stats = TESTING_UTIL.getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false);
+ rt = RegionTransition.parseFrom(ZKAssign.getData(TESTING_UTIL.getZooKeeperWatcher(),
hri.getEncodedName()));
- // State could be SPLIT or SPLITTING.
- assertTrue(rt.getEventType().equals(EventType.RS_ZK_REGION_SPLIT) ||
- rt.getEventType().equals(EventType.RS_ZK_REGION_SPLITTING));
+ if (rt.getEventType().equals(EventType.RS_ZK_REGION_SPLIT)) break;
+ Thread.sleep(100);
+ }
+ LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats=" + stats);
+ assertTrue(rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_SPLIT));
// Now crash the server
cluster.abortRegionServer(tableRegionIndex);
waitUntilRegionServerDead();
@@ -316,7 +319,7 @@ public class TestSplitTransactionOnClust
assertTrue(stats == null);
} finally {
// Set this flag back.
- SplitRegionHandler.TEST_SKIP = false;
+ AssignmentManager.TEST_SKIP_SPLIT_HANDLING = false;
admin.setBalancerRunning(true, false);
cluster.getMaster().setCatalogJanitorEnabled(true);
t.close();
@@ -576,7 +579,7 @@ public class TestSplitTransactionOnClust
printOutRegions(server, "Initial regions: ");
// Now, before we split, set special flag in master, a flag that has
// it FAIL the processing of split.
- SplitRegionHandler.TEST_SKIP = true;
+ AssignmentManager.TEST_SKIP_SPLIT_HANDLING = true;
// Now try splitting and it should work.
this.admin.split(hri.getRegionNameAsString());
@@ -606,7 +609,7 @@ public class TestSplitTransactionOnClust
assertTrue(regionServerOfRegion != null);
// Remove the block so that split can move ahead.
- SplitRegionHandler.TEST_SKIP = false;
+ AssignmentManager.TEST_SKIP_SPLIT_HANDLING = false;
String node = ZKAssign.getNodeName(zkw, hri.getEncodedName());
Stat stat = new Stat();
byte[] data = ZKUtil.getDataNoWatch(zkw, node, stat);
@@ -623,7 +626,7 @@ public class TestSplitTransactionOnClust
assertTrue(regionServerOfRegion == null);
} finally {
// Set this flag back.
- SplitRegionHandler.TEST_SKIP = false;
+ AssignmentManager.TEST_SKIP_SPLIT_HANDLING = false;
admin.setBalancerRunning(true, false);
cluster.getMaster().setCatalogJanitorEnabled(true);
t.close();
@@ -696,8 +699,6 @@ public class TestSplitTransactionOnClust
ServerName regionServerOfRegion = regionStates.getRegionServerOfRegion(hri);
assertTrue(regionServerOfRegion == null);
} finally {
- // Set this flag back.
- SplitRegionHandler.TEST_SKIP = false;
this.admin.setBalancerRunning(true, false);
cluster.getMaster().setCatalogJanitorEnabled(true);
t.close();
@@ -851,8 +852,8 @@ public class TestSplitTransactionOnClust
assertTrue("not able to find a splittable region", region != null);
SplitTransaction st = new MockedSplitTransaction(region, Bytes.toBytes("row2")) {
@Override
- int createNodeSplitting(ZooKeeperWatcher zkw, HRegionInfo region,
- ServerName serverName) throws KeeperException, IOException {
+ PairOfSameType<HRegion> createDaughters(final Server server,
+ final RegionServerServices services) throws IOException {
throw new SplittingNodeCreationFailedException ();
}
};