You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2013/07/16 02:38:22 UTC
svn commit: r1503530 - in /hbase/branches/0.94/src:
main/java/org/apache/hadoop/hbase/util/
test/java/org/apache/hadoop/hbase/mttr/
test/java/org/apache/hadoop/hbase/util/
Author: apurtell
Date: Tue Jul 16 00:38:22 2013
New Revision: 1503530
URL: http://svn.apache.org/r1503530
Log:
HBASE-8908. Backport HBASE-8882+HBASE-8904+HBASE-8924 (An Integration Test to Test MTTR) to 0.94
Added:
hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/mttr/
hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/mttr/IntegrationTestMTTR.java
Modified:
hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/util/AbstractHBaseTool.java
hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/util/ChaosMonkey.java
Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/util/AbstractHBaseTool.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/util/AbstractHBaseTool.java?rev=1503530&r1=1503529&r2=1503530&view=diff
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/util/AbstractHBaseTool.java (original)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/util/AbstractHBaseTool.java Tue Jul 16 00:38:22 2013
@@ -16,6 +16,7 @@
*/
package org.apache.hadoop.hbase.util;
+import java.io.IOException;
import java.util.Set;
import java.util.TreeSet;
@@ -76,7 +77,7 @@ public abstract class AbstractHBaseTool
}
@Override
- public final int run(String[] args) throws Exception {
+ public final int run(String[] args) throws IOException {
if (conf == null) {
LOG.error("Tool configuration is not initialized");
throw new NullPointerException("conf");
Added: hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/mttr/IntegrationTestMTTR.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/mttr/IntegrationTestMTTR.java?rev=1503530&view=auto
==============================================================================
--- hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/mttr/IntegrationTestMTTR.java (added)
+++ hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/mttr/IntegrationTestMTTR.java Tue Jul 16 00:38:22 2013
@@ -0,0 +1,518 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mttr;
+
+import com.google.common.base.Objects;
+import org.apache.commons.lang.RandomStringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.math.stat.descriptive.DescriptiveStatistics;
+import org.apache.hadoop.hbase.ClusterStatus;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.IntegrationTestingUtility;
+import org.apache.hadoop.hbase.IntegrationTests;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.filter.KeyOnlyFilter;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.ChaosMonkey;
+import org.apache.hadoop.hbase.util.LoadTestTool;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+
+import static junit.framework.Assert.assertEquals;
+
+/**
+ * Integration test that should benchmark how fast HBase can recover from failures. This test starts
+ * different threads:
+ * <ol>
+ * <li>
+ * Load Test Tool.<br/>
+ * This runs so that all RegionServers will have some load and HLogs will be full.
+ * </li>
+ * <li>
+ * Scan thread.<br/>
+ * This thread runs a very short scan over and over again recording how log it takes to respond.
+ * The longest response is assumed to be the time it took to recover.
+ * </li>
+ * <li>
+ * Put thread.<br/>
+ * This thread just like the scan thread except it does a very small put.
+ * </li>
+ * <li>
+ * Admin thread. <br/>
+ * This thread will continually go to the master to try and get the cluster status. Just like the
+ * put and scan threads, the time to respond is recorded.
+ * </li>
+ * <li>
+ * Chaos Monkey thread.<br/>
+ * This thread runs a ChaosMonkey.Action.
+ * </li>
+ * </ol>
+ * <p/>
+ * The ChaosMonkey actions currently run are:
+ * <ul>
+ * <li>Restart the RegionServer holding meta.</li>
+ * <li>Restart the RegionServer holding the table the scan and put threads are targeting.</li>
+ * <li>Move the Regions of the table used by the scan and put threads.</li>
+ * <li>Restart the master.</li>
+ * </ul>
+ * <p/>
+ * At the end of the test a log line is output on the INFO level containing the timing data that was
+ * collected.
+ */
+
+@Category(IntegrationTests.class)
+public class IntegrationTestMTTR {
+ /**
+ * Constants.
+ */
+ private static final byte[] FAMILY = Bytes.toBytes("d");
+ private static final Log LOG = LogFactory.getLog(IntegrationTestMTTR.class);
+ private static final long SLEEP_TIME = 60 * 1000l;
+
+ /**
+ * Configurable table names.
+ */
+ private static String tableName;
+ private static byte[] tableNameBytes;
+ private static String loadTableName;
+ private static byte[] loadTableNameBytes;
+
+ /**
+ * Util to get at the cluster.
+ */
+ private static IntegrationTestingUtility util;
+
+ /**
+ * Executor for test threads.
+ */
+ private static ExecutorService executorService;
+
+ /**
+ * All of the chaos monkey actions used.
+ */
+ private static ChaosMonkey.Action restartRSAction;
+ private static ChaosMonkey.Action restartMetaAction;
+ private static ChaosMonkey.Action moveRegionAction;
+ private static ChaosMonkey.Action restartMasterAction;
+
+ /**
+ * The load test tool used to create load and make sure that HLogs aren't empty.
+ */
+ private static LoadTestTool loadTool;
+
+
+ @BeforeClass
+ public static void setUp() throws Exception {
+ // Set up the integration test util
+ if (util == null) {
+ util = new IntegrationTestingUtility();
+ }
+
+ // Make sure there are three servers.
+ util.initializeCluster(3);
+
+ // Set up the load test tool.
+ loadTool = new LoadTestTool();
+ loadTool.setConf(util.getConfiguration());
+
+ // Create executor with enough threads to restart rs's,
+ // run scans, puts, admin ops and load test tool.
+ executorService = Executors.newFixedThreadPool(8);
+
+ // Set up the tables needed.
+ setupTables();
+
+ // Set up the actions.
+ setupActions();
+ }
+
+ private static void setupActions() throws IOException {
+ // Set up the action that will restart a region server holding a region from our table
+ // because this table should only have one region we should be good.
+ restartRSAction = new ChaosMonkey.RestartRsHoldingTable(SLEEP_TIME, tableName);
+
+ // Set up the action that will kill the region holding meta.
+ restartMetaAction = new ChaosMonkey.RestartRsHoldingMeta(SLEEP_TIME);
+
+ // Set up the action that will move the regions of our table.
+ moveRegionAction = new ChaosMonkey.MoveRegionsOfTable(SLEEP_TIME, tableName);
+
+ // Kill the master
+ restartMasterAction = new ChaosMonkey.RestartActiveMaster(1000);
+
+ // Give the action the access to the cluster.
+ ChaosMonkey.ActionContext actionContext = new ChaosMonkey.ActionContext(util);
+ restartRSAction.init(actionContext);
+ restartMetaAction.init(actionContext);
+ moveRegionAction.init(actionContext);
+ restartMasterAction.init(actionContext);
+ }
+
+ private static void setupTables() throws IOException {
+ // Get the table name.
+ tableName = util.getConfiguration()
+ .get("hbase.IntegrationTestMTTR.tableName", "IntegrationTestMTTR");
+ tableNameBytes = Bytes.toBytes(tableName);
+
+ loadTableName = util.getConfiguration()
+ .get("hbase.IntegrationTestMTTR.loadTableName", "IntegrationTestMTTRLoadTestTool");
+ loadTableNameBytes = Bytes.toBytes(loadTableName);
+
+ if (util.getHBaseAdmin().tableExists(tableNameBytes)) {
+ util.deleteTable(tableNameBytes);
+ }
+
+ if (util.getHBaseAdmin().tableExists(loadTableName)) {
+ util.deleteTable(loadTableNameBytes);
+ }
+
+ // Create the table. If this fails then fail everything.
+ HTableDescriptor tableDescriptor = new HTableDescriptor(tableNameBytes);
+
+ // Make the max file size huge so that splits don't happen during the test.
+ tableDescriptor.setMaxFileSize(Long.MAX_VALUE);
+
+ HColumnDescriptor descriptor = new HColumnDescriptor(FAMILY);
+ descriptor.setMaxVersions(1);
+ tableDescriptor.addFamily(descriptor);
+ util.getHBaseAdmin().createTable(tableDescriptor);
+
+ // Setup the table for LoadTestTool
+ int ret = loadTool.run(new String[]{"-tn", loadTableName, "-init_only"});
+ assertEquals("Failed to initialize LoadTestTool", 0, ret);
+ }
+
+ @AfterClass
+ public static void after() throws IOException {
+ // Clean everything up.
+ util.restoreCluster();
+ util = null;
+
+ // Stop the threads so that we know everything is complete.
+ executorService.shutdown();
+ executorService = null;
+
+ // Clean up the actions.
+ moveRegionAction = null;
+ restartMetaAction = null;
+ restartRSAction = null;
+ restartMasterAction = null;
+
+ loadTool = null;
+ }
+
+ @Test
+ public void testRestartRsHoldingTable() throws Exception {
+ run(new ActionCallable(restartRSAction), "RestartRsHoldingTable");
+ }
+
+ @Test
+ public void testKillRsHoldingMeta() throws Exception {
+ run(new ActionCallable(restartMetaAction), "KillRsHoldingMeta");
+ }
+
+ @Test
+ public void testMoveRegion() throws Exception {
+ run(new ActionCallable(moveRegionAction), "MoveRegion");
+ }
+
+ @Test
+ public void testRestartMaster() throws Exception {
+ run(new ActionCallable(restartMasterAction), "RestartMaster");
+ }
+
+ public void run(Callable<Boolean> monkeyCallable, String testName) throws Exception {
+ int maxIters = util.getHBaseClusterInterface().isDistributedCluster() ? 10 : 3;
+
+ // Array to keep track of times.
+ ArrayList<TimingResult> resultPuts = new ArrayList<TimingResult>(maxIters);
+ ArrayList<TimingResult> resultScan = new ArrayList<TimingResult>(maxIters);
+ ArrayList<TimingResult> resultAdmin = new ArrayList<TimingResult>(maxIters);
+ long start = System.nanoTime();
+
+ // We're going to try this multiple times
+ for (int fullIterations = 0; fullIterations < maxIters; fullIterations++) {
+ // Create and start executing a callable that will kill the servers
+ Future<Boolean> monkeyFuture = executorService.submit(monkeyCallable);
+
+ // Pass that future to the timing Callables.
+ Future<TimingResult> putFuture = executorService.submit(new PutCallable(monkeyFuture));
+ Future<TimingResult> scanFuture = executorService.submit(new ScanCallable(monkeyFuture));
+ Future<TimingResult> adminFuture = executorService.submit(new AdminCallable(monkeyFuture));
+
+ Future<Boolean> loadFuture = executorService.submit(new LoadCallable(monkeyFuture));
+
+ monkeyFuture.get();
+ loadFuture.get();
+
+ // Get the values from the futures.
+ TimingResult putTime = putFuture.get();
+ TimingResult scanTime = scanFuture.get();
+ TimingResult adminTime = adminFuture.get();
+
+ // Store the times to display later.
+ resultPuts.add(putTime);
+ resultScan.add(scanTime);
+ resultAdmin.add(adminTime);
+
+ // Wait some time for everything to settle down.
+ Thread.sleep(5000l);
+ }
+
+ long runtimeMs = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
+
+ Objects.ToStringHelper helper = Objects.toStringHelper("MTTRResults")
+ .add("putResults", resultPuts)
+ .add("scanResults", resultScan)
+ .add("adminResults", resultAdmin)
+ .add("totalRuntimeMs", runtimeMs)
+ .add("name", testName);
+
+ // Log the info
+ LOG.info(helper.toString());
+ }
+
+ /**
+ * Class to store results of TimingCallable.
+ *
+ * Stores times and trace id.
+ */
+ private class TimingResult {
+ DescriptiveStatistics stats = new DescriptiveStatistics();
+
+ /**
+ * Add a result to this aggregate result.
+ * @param time Time in nanoseconds
+ * @param span Span. To be kept if the time taken was over 1 second
+ */
+ public void addResult(long time) {
+ stats.addValue(TimeUnit.MILLISECONDS.convert(time, TimeUnit.NANOSECONDS));
+ }
+
+ public String toString() {
+ Objects.ToStringHelper helper = Objects.toStringHelper(this)
+ .add("numResults", stats.getN())
+ .add("minTime", stats.getMin())
+ .add("meanTime", stats.getMean())
+ .add("maxTime", stats.getMax())
+ .add("25th", stats.getPercentile(25))
+ .add("50th", stats.getPercentile(50))
+ .add("75th", stats.getPercentile(75))
+ .add("90th", stats.getPercentile(90))
+ .add("95th", stats.getPercentile(95))
+ .add("99th", stats.getPercentile(99))
+ .add("99.9th", stats.getPercentile(99.9))
+ .add("99.99th", stats.getPercentile(99.99));
+ return helper.toString();
+ }
+ }
+
+ /**
+ * Base class for actions that need to record the time needed to recover from a failure.
+ */
+ public abstract class TimingCallable implements Callable<TimingResult> {
+ protected final Future future;
+
+ public TimingCallable(Future f) {
+ future = f;
+ }
+
+ @Override
+ public TimingResult call() throws Exception {
+ TimingResult result = new TimingResult();
+ int numAfterDone = 0;
+ // Keep trying until the rs is back up and we've gotten a put through
+ while (numAfterDone < 10) {
+ long start = System.nanoTime();
+ try {
+ boolean actionResult = doAction();
+ if (actionResult && future.isDone()) {
+ numAfterDone ++;
+ }
+ } catch (Exception e) {
+ numAfterDone = 0;
+ }
+ result.addResult(System.nanoTime() - start);
+ }
+ return result;
+ }
+
+ protected abstract boolean doAction() throws Exception;
+
+ protected String getSpanName() {
+ return this.getClass().getSimpleName();
+ }
+ }
+
+ /**
+ * Callable that will keep putting small amounts of data into a table
+ * until the future supplied returns. It keeps track of the max time.
+ */
+ public class PutCallable extends TimingCallable {
+
+ private final HTable table;
+
+ public PutCallable(Future f) throws IOException {
+ super(f);
+ this.table = new HTable(util.getConfiguration(), tableNameBytes);
+ }
+
+ @Override
+ protected boolean doAction() throws Exception {
+ Put p = new Put(Bytes.toBytes(RandomStringUtils.randomAlphanumeric(5)));
+ p.add(FAMILY, Bytes.toBytes("\0"), Bytes.toBytes(RandomStringUtils.randomAscii(5)));
+ table.put(p);
+ table.flushCommits();
+ return true;
+ }
+
+ @Override
+ protected String getSpanName() {
+ return "MTTR Put Test";
+ }
+ }
+
+ /**
+ * Callable that will keep scanning for small amounts of data until the
+ * supplied future returns. Returns the max time taken to scan.
+ */
+ public class ScanCallable extends TimingCallable {
+ private final HTable table;
+
+ public ScanCallable(Future f) throws IOException {
+ super(f);
+ this.table = new HTable(util.getConfiguration(), tableNameBytes);
+ }
+
+ @Override
+ protected boolean doAction() throws Exception {
+ ResultScanner rs = null;
+ try {
+ Scan s = new Scan();
+ s.setBatch(2);
+ s.addFamily(FAMILY);
+ s.setFilter(new KeyOnlyFilter());
+ s.setMaxVersions(1);
+
+ rs = table.getScanner(s);
+ Result result = rs.next();
+ return rs != null && result != null && result.size() > 0;
+ } finally {
+ if (rs != null) {
+ rs.close();
+ }
+ }
+ }
+ @Override
+ protected String getSpanName() {
+ return "MTTR Scan Test";
+ }
+ }
+
+ /**
+ * Callable that will keep going to the master for cluster status. Returns the max time taken.
+ */
+ public class AdminCallable extends TimingCallable {
+
+ public AdminCallable(Future f) throws IOException {
+ super(f);
+ }
+
+ @Override
+ protected boolean doAction() throws Exception {
+ HBaseAdmin admin = new HBaseAdmin(util.getConfiguration());
+ ClusterStatus status = admin.getClusterStatus();
+ return status != null;
+ }
+
+ @Override
+ protected String getSpanName() {
+ return "MTTR Admin Test";
+ }
+ }
+
+
+ public class ActionCallable implements Callable<Boolean> {
+ private final ChaosMonkey.Action action;
+
+ public ActionCallable(ChaosMonkey.Action action) {
+ this.action = action;
+ }
+
+ @Override
+ public Boolean call() throws Exception {
+ this.action.perform();
+ return true;
+ }
+ }
+
+ /**
+ * Callable used to make sure the cluster has some load on it.
+ * This callable uses LoadTest tool to
+ */
+ public class LoadCallable implements Callable<Boolean> {
+
+ private final Future future;
+
+ public LoadCallable(Future f) {
+ future = f;
+ }
+
+ @Override
+ public Boolean call() throws Exception {
+ int colsPerKey = 10;
+ int recordSize = 500;
+ int numServers = util.getHBaseClusterInterface().getInitialClusterStatus().getServersSize();
+ int numKeys = numServers * 5000;
+ int writeThreads = 10;
+
+
+ // Loop until the chaos monkey future is done.
+ // But always go in just in case some action completes quickly
+ do {
+ int ret = loadTool.run(new String[]{
+ "-tn", loadTableName,
+ "-write", String.format("%d:%d:%d", colsPerKey, recordSize, writeThreads),
+ "-num_keys", String.valueOf(numKeys),
+ "-skip_init"
+ });
+ assertEquals("Load failed", 0, ret);
+ } while (!future.isDone());
+
+ return true;
+ }
+ }
+}
Modified: hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/util/ChaosMonkey.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/util/ChaosMonkey.java?rev=1503530&r1=1503529&r2=1503530&view=diff
==============================================================================
--- hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/util/ChaosMonkey.java (original)
+++ hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/util/ChaosMonkey.java Tue Jul 16 00:38:22 2013
@@ -22,13 +22,11 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
-import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Random;
-import java.util.Set;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.logging.Log;
@@ -44,12 +42,12 @@ import org.apache.hadoop.hbase.Integrati
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.Stoppable;
import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.ToolRunner;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
-import com.google.protobuf.ServiceException;
/**
* A utility to injects faults in a running cluster.
@@ -113,10 +111,10 @@ public class ChaosMonkey extends Abstrac
/**
* Context for Action's
*/
- private static class ActionContext {
+ public static class ActionContext {
private IntegrationTestingUtility util;
- ActionContext(IntegrationTestingUtility util) {
+ public ActionContext(IntegrationTestingUtility util) {
this.util = util;
}
@@ -143,7 +141,7 @@ public class ChaosMonkey extends Abstrac
protected ClusterStatus initialStatus;
protected ServerName[] initialServers;
- void init(ActionContext context) throws Exception {
+ public void init(ActionContext context) throws IOException {
this.context = context;
cluster = context.getHBaseCluster();
initialStatus = cluster.getInitialClusterStatus();
@@ -151,7 +149,7 @@ public class ChaosMonkey extends Abstrac
initialServers = regionServers.toArray(new ServerName[regionServers.size()]);
}
- void perform() throws Exception { };
+ public void perform() throws Exception { };
// TODO: perhaps these methods should be elsewhere?
/** Returns current region servers */
@@ -204,12 +202,14 @@ public class ChaosMonkey extends Abstrac
}
void restartMaster(ServerName server, long sleepTime) throws IOException {
+ sleepTime = Math.max(sleepTime, 1000);
killMaster(server);
sleep(sleepTime);
startMaster(server);
}
void restartRs(ServerName server, long sleepTime) throws IOException {
+ sleepTime = Math.max(sleepTime, 1000);
killRs(server);
sleep(sleepTime);
startRs(server);
@@ -221,7 +221,7 @@ public class ChaosMonkey extends Abstrac
super(sleepTime);
}
@Override
- void perform() throws Exception {
+ public void perform() throws Exception {
LOG.info("Performing action: Restart active master");
ServerName master = cluster.getClusterStatus().getMaster();
@@ -235,7 +235,7 @@ public class ChaosMonkey extends Abstrac
}
@Override
- void perform() throws Exception {
+ public void perform() throws Exception {
LOG.info("Performing action: Restart random region server");
ServerName server = selectRandomItem(getCurrentServers());
@@ -243,12 +243,12 @@ public class ChaosMonkey extends Abstrac
}
}
- public static class RestartRsHoldingMeta extends RestartRandomRs {
+ public static class RestartRsHoldingMeta extends RestartActionBase {
public RestartRsHoldingMeta(long sleepTime) {
super(sleepTime);
}
@Override
- void perform() throws Exception {
+ public void perform() throws Exception {
LOG.info("Performing action: Restart region server holding META");
ServerName server = cluster.getServerHoldingMeta();
if (server == null) {
@@ -264,7 +264,7 @@ public class ChaosMonkey extends Abstrac
super(sleepTime);
}
@Override
- void perform() throws Exception {
+ public void perform() throws Exception {
LOG.info("Performing action: Restart region server holding ROOT");
ServerName server = cluster.getServerHoldingMeta();
if (server == null) {
@@ -275,6 +275,70 @@ public class ChaosMonkey extends Abstrac
}
}
+ public static class RestartRsHoldingTable extends RestartActionBase {
+
+ private final String tableName;
+
+ public RestartRsHoldingTable(long sleepTime, String tableName) {
+ super(sleepTime);
+ this.tableName = tableName;
+ }
+
+ @Override
+ public void perform() throws Exception {
+ HTable table = null;
+ Collection<ServerName> serverNames;
+ try {
+ Configuration conf = context.getHaseIntegrationTestingUtility().getConfiguration();
+ table = new HTable(conf, tableName);
+ serverNames = table.getRegionLocations().values();
+ } catch (IOException e) {
+ LOG.debug("Error creating HTable used to get list of region locations.", e);
+ return;
+ } finally {
+ if (table != null) {
+ table.close();
+ }
+ }
+ Random random = new Random();
+ ServerName[] nameArray = serverNames.toArray(new ServerName[serverNames.size()]);
+ restartRs(nameArray[random.nextInt(nameArray.length)], sleepTime);
+ }
+ }
+
+ public static class MoveRegionsOfTable extends Action {
+ private final long sleepTime;
+ private final byte[] tableNameBytes;
+
+ public MoveRegionsOfTable(long sleepTime, String tableName) {
+ this.sleepTime = sleepTime;
+ this.tableNameBytes = Bytes.toBytes(tableName);
+ }
+
+ @Override
+ public void perform() throws Exception {
+ try {
+ HBaseAdmin admin = this.context.getHaseIntegrationTestingUtility().getHBaseAdmin();
+ List<HRegionInfo> regions = admin.getTableRegions(tableNameBytes);
+ Collection<ServerName> serversList = admin.getClusterStatus().getServers();
+ ServerName[] servers = serversList.toArray(new ServerName[serversList.size()]);
+ Random random = new Random();
+ for (HRegionInfo regionInfo:regions) {
+ try {
+ byte[] destServerName =
+ Bytes.toBytes(servers[random.nextInt(servers.length)].getServerName());
+ admin.move(regionInfo.getRegionName(), destServerName);
+ } catch (Exception e) {
+ LOG.debug("Error moving region", e);
+ }
+ }
+ Thread.sleep(sleepTime);
+ } catch (Exception e) {
+ LOG.debug("Error performing MoveRegionsOfTable", e);
+ }
+ }
+ }
+
/**
* Restarts a ratio of the running regionservers at the same time
*/
@@ -287,7 +351,7 @@ public class ChaosMonkey extends Abstrac
}
@Override
- void perform() throws Exception {
+ public void perform() throws Exception {
LOG.info(String.format("Performing action: Batch restarting %d%% of region servers",
(int)(ratio * 100)));
List<ServerName> selectedServers = selectRandomItems(getCurrentServers(), ratio);
@@ -329,10 +393,10 @@ public class ChaosMonkey extends Abstrac
}
@Override
- void perform() throws Exception {
+ public void perform() throws Exception {
+ Random random = new Random();
LOG.info(String.format("Performing action: Rolling batch restarting %d%% of region servers",
(int)(ratio * 100)));
- Random random = new Random();
List<ServerName> selectedServers = selectRandomItems(getCurrentServers(), ratio);
Queue<ServerName> serversToBeKilled = new LinkedList<ServerName>(selectedServers);
@@ -379,7 +443,7 @@ public class ChaosMonkey extends Abstrac
}
@Override
- void perform() throws Exception {
+ public void perform() throws Exception {
LOG.info("Unbalancing regions");
ClusterStatus status = this.cluster.getClusterStatus();
List<ServerName> victimServers = new LinkedList<ServerName>(status.getServers());
@@ -417,7 +481,7 @@ public class ChaosMonkey extends Abstrac
public static class ForceBalancerAction extends Action {
@Override
- void perform() throws Exception {
+ public void perform() throws Exception {
LOG.info("Balancing regions");
HBaseAdmin admin = this.context.getHaseIntegrationTestingUtility().getHBaseAdmin();
boolean result = admin.balancer();
@@ -514,7 +578,6 @@ public class ChaosMonkey extends Abstrac
}
}
-
/** A policy which performs a sequence of actions deterministically. */
public static class DoActionsOncePolicy extends PeriodicPolicy {
private List<Action> actions;