You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2014/10/30 04:58:07 UTC

[1/5] git commit: HBASE-10314 Add Chaos Monkey that doesn't touch the master (Elliott Clark)

Repository: hbase
Updated Branches:
  refs/heads/0.98 8e64e1bbe -> ea479051f


HBASE-10314 Add Chaos Monkey that doesn't touch the master (Elliott Clark)

Conflicts:
	hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java
	hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/StressAssignmentManagerMonkeyFactory.java

Includes license addendum.

Amending-Author: Andrew Purtell <ap...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/5c511f3f
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/5c511f3f
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/5c511f3f

Branch: refs/heads/0.98
Commit: 5c511f3fdd6a482706f80ae7c29bccf00efe7342
Parents: 30d4d5d
Author: Andrew Purtell <ap...@apache.org>
Authored: Wed Oct 29 20:56:57 2014 -0700
Committer: Andrew Purtell <ap...@apache.org>
Committed: Wed Oct 29 20:56:57 2014 -0700

----------------------------------------------------------------------
 .../hbase/chaos/factories/MonkeyFactory.java    |  2 +
 .../StressAssignmentManagerMonkeyFactory.java   | 81 ++++++++++++++++++++
 2 files changed, 83 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/5c511f3f/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java
index 944fe14..25c809f 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java
@@ -66,6 +66,7 @@ public abstract class MonkeyFactory {
   public static final String SLOW_DETERMINISTIC = "slowDeterministic";
   public static final String UNBALANCE = "unbalance";
   public static final String SERVER_KILLING = "serverKilling";
+  public static final String STRESS_AM = "stressAM";
   public static final String NO_KILL = "noKill";
 
   public static Map<String, MonkeyFactory> FACTORIES = ImmutableMap.<String,MonkeyFactory>builder()
@@ -73,6 +74,7 @@ public abstract class MonkeyFactory {
     .put(SLOW_DETERMINISTIC, new SlowDeterministicMonkeyFactory())
     .put(UNBALANCE, new UnbalanceMonkeyFactory())
     .put(SERVER_KILLING, new ServerKillingMonkeyFactory())
+    .put(STRESS_AM, new StressAssignmentManagerMonkeyFactory())
     .put(NO_KILL, new NoKillMonkeyFactory())
     .build();
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/5c511f3f/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/StressAssignmentManagerMonkeyFactory.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/StressAssignmentManagerMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/StressAssignmentManagerMonkeyFactory.java
new file mode 100644
index 0000000..befb2fa
--- /dev/null
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/StressAssignmentManagerMonkeyFactory.java
@@ -0,0 +1,81 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.chaos.factories;
+
+import org.apache.hadoop.hbase.chaos.actions.Action;
+import org.apache.hadoop.hbase.chaos.actions.AddColumnAction;
+import org.apache.hadoop.hbase.chaos.actions.BatchRestartRsAction;
+import org.apache.hadoop.hbase.chaos.actions.CompactRandomRegionOfTableAction;
+import org.apache.hadoop.hbase.chaos.actions.CompactTableAction;
+import org.apache.hadoop.hbase.chaos.actions.DumpClusterStatusAction;
+import org.apache.hadoop.hbase.chaos.actions.FlushRandomRegionOfTableAction;
+import org.apache.hadoop.hbase.chaos.actions.FlushTableAction;
+import org.apache.hadoop.hbase.chaos.actions.MergeRandomAdjacentRegionsOfTableAction;
+import org.apache.hadoop.hbase.chaos.actions.MoveRandomRegionOfTableAction;
+import org.apache.hadoop.hbase.chaos.actions.MoveRegionsOfTableAction;
+import org.apache.hadoop.hbase.chaos.actions.RemoveColumnAction;
+import org.apache.hadoop.hbase.chaos.actions.RestartRandomRsAction;
+import org.apache.hadoop.hbase.chaos.actions.RestartRsHoldingMetaAction;
+import org.apache.hadoop.hbase.chaos.actions.RollingBatchRestartRsAction;
+import org.apache.hadoop.hbase.chaos.actions.SplitRandomRegionOfTableAction;
+import org.apache.hadoop.hbase.chaos.monkies.ChaosMonkey;
+import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
+import org.apache.hadoop.hbase.chaos.policies.CompositeSequentialPolicy;
+import org.apache.hadoop.hbase.chaos.policies.DoActionsOncePolicy;
+import org.apache.hadoop.hbase.chaos.policies.PeriodicRandomActionPolicy;
+
+public class StressAssignmentManagerMonkeyFactory extends MonkeyFactory {
+  @Override
+  public ChaosMonkey build() {
+
+    // Actions that could slow down region movement.
+    // These could also get regions stuck if there are issues.
+    Action[] actions1 = new Action[] {
+        new CompactTableAction(tableName, 0.5f),
+        new CompactRandomRegionOfTableAction(tableName, 0.6f),
+        new FlushTableAction(tableName),
+        new FlushRandomRegionOfTableAction(tableName)
+    };
+
+    Action[] actions2 = new Action[] {
+        new SplitRandomRegionOfTableAction(tableName),
+        new MergeRandomAdjacentRegionsOfTableAction(tableName),
+        new AddColumnAction(tableName),
+        new RemoveColumnAction(tableName, columnFamilies),
+        new MoveRegionsOfTableAction(800, 1600, tableName),
+        new MoveRandomRegionOfTableAction(800, tableName),
+        new RestartRandomRsAction(60000),
+        new BatchRestartRsAction(5000, 0.5f),
+        new RollingBatchRestartRsAction(5000, 1.0f),
+        new RestartRsHoldingMetaAction(35000)
+    };
+
+    // Action to log more info for debugging
+    Action[] actions3 = new Action[] {
+        new DumpClusterStatusAction()
+    };
+
+    return new PolicyBasedChaosMonkey(util,
+        new PeriodicRandomActionPolicy(90 * 1000, actions1),
+        new CompositeSequentialPolicy(
+            new DoActionsOncePolicy(90 * 1000, actions2),
+            new PeriodicRandomActionPolicy(90 * 1000, actions2)),
+        new PeriodicRandomActionPolicy(90 * 1000, actions3)
+    );
+  }
+}


[3/5] git commit: HBASE-12370 Backport IT changes from HBASE-10572 to 0.98

Posted by ap...@apache.org.
HBASE-12370 Backport IT changes from HBASE-10572 to 0.98


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/f0d52fbe
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/f0d52fbe
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/f0d52fbe

Branch: refs/heads/0.98
Commit: f0d52fbe12fded158f761f615f52d4fd8b90ed31
Parents: 8e64e1b
Author: Andrew Purtell <ap...@apache.org>
Authored: Wed Oct 29 20:56:57 2014 -0700
Committer: Andrew Purtell <ap...@apache.org>
Committed: Wed Oct 29 20:56:57 2014 -0700

----------------------------------------------------------------------
 .../hadoop/hbase/IntegrationTestBase.java       |  6 +-
 .../hadoop/hbase/IntegrationTestIngest.java     | 70 ++++++++++++++++----
 .../RestartRandomRsExceptMetaAction.java        | 42 ++++++++++++
 .../actions/RollingBatchRestartRsAction.java    | 50 ++++++++++----
 .../RollingBatchRestartRsExceptMetaAction.java  | 43 ++++++++++++
 .../hbase/chaos/factories/MonkeyFactory.java    |  6 +-
 .../factories/ServerKillingMonkeyFactory.java   | 61 +++++++++++++++++
 .../hadoop/hbase/HBaseTestingUtility.java       | 32 +++++++--
 .../apache/hadoop/hbase/util/LoadTestTool.java  | 63 +++++++++++++-----
 .../hadoop/hbase/util/MultiThreadedReader.java  | 54 +++++++++------
 .../hbase/util/MultiThreadedReaderWithACL.java  |  5 +-
 11 files changed, 362 insertions(+), 70 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/f0d52fbe/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestBase.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestBase.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestBase.java
index 170e998..b705e79 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestBase.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestBase.java
@@ -100,12 +100,15 @@ public abstract class IntegrationTestBase extends AbstractHBaseTool {
     util = getTestingUtil(getConf());
     MonkeyFactory fact = MonkeyFactory.getFactory(monkeyToUse);
     if (fact == null) {
-      // Run with no monkey in distributed context, with real monkey in local test context.
       fact = getDefaultMonkeyFactory();
     }
     monkey = fact.setUtil(util)
                  .setTableName(getTablename())
                  .setColumnFamilies(getColumnFamilies()).build();
+    startMonkey();
+  }
+
+  protected void startMonkey() throws Exception {
     monkey.start();
   }
 
@@ -124,6 +127,7 @@ public abstract class IntegrationTestBase extends AbstractHBaseTool {
     if (this.util == null) {
       if (conf == null) {
         this.util = new IntegrationTestingUtility();
+        this.setConf(util.getConfiguration());
       } else {
         this.util = new IntegrationTestingUtility(conf);
       }

http://git-wip-us.apache.org/repos/asf/hbase/blob/f0d52fbe/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngest.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngest.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngest.java
index 920a659..10b064a 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngest.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngest.java
@@ -42,21 +42,38 @@ import com.google.common.collect.Sets;
 @Category(IntegrationTests.class)
 public class IntegrationTestIngest extends IntegrationTestBase {
   public static final char HIPHEN = '-';
-  private static final int SERVER_COUNT = 4; // number of slaves for the smallest cluster
+  private static final int SERVER_COUNT = 1; // number of slaves for the smallest cluster
   private static final long DEFAULT_RUN_TIME = 20 * 60 * 1000;
   private static final long JUNIT_RUN_TIME = 10 * 60 * 1000;
 
   /** A soft limit on how long we should run */
-  private static final String RUN_TIME_KEY = "hbase.%s.runtime";
+  protected static final String RUN_TIME_KEY = "hbase.%s.runtime";
+
+  protected static final String NUM_KEYS_PER_SERVER_KEY = "num_keys_per_server";
+  protected static final long DEFAULT_NUM_KEYS_PER_SERVER = 2500;
+
+  protected static final String NUM_WRITE_THREADS_KEY = "num_write_threads";
+  protected static final int DEFAULT_NUM_WRITE_THREADS = 20;
+
+  protected static final String NUM_READ_THREADS_KEY = "num_read_threads";
+  protected static final int DEFAULT_NUM_READ_THREADS = 20;
 
   protected static final Log LOG = LogFactory.getLog(IntegrationTestIngest.class);
   protected IntegrationTestingUtility util;
   protected HBaseCluster cluster;
   protected LoadTestTool loadTool;
 
+  protected String[] LOAD_TEST_TOOL_INIT_ARGS = {
+      LoadTestTool.OPT_COMPRESSION,
+      LoadTestTool.OPT_DATA_BLOCK_ENCODING,
+      LoadTestTool.OPT_INMEMORY,
+      LoadTestTool.OPT_ENCRYPTION,
+      LoadTestTool.OPT_NUM_REGIONS_PER_SERVER,
+  };
+
   @Override
   public void setUpCluster() throws Exception {
-    util = getTestingUtil(null);
+    util = getTestingUtil(getConf());
     LOG.debug("Initializing/checking cluster has " + SERVER_COUNT + " servers");
     util.initializeCluster(SERVER_COUNT);
     LOG.debug("Done initializing/checking cluster");
@@ -70,7 +87,7 @@ public class IntegrationTestIngest extends IntegrationTestBase {
   }
 
   protected void initTable() throws IOException {
-    int ret = loadTool.run(new String[] { "-tn", getTablename(), "-init_only" });
+    int ret = loadTool.run(getArgsForLoadTestToolInitTable());
     Assert.assertEquals("Failed to initialize LoadTestTool", 0, ret);
   }
 
@@ -82,16 +99,24 @@ public class IntegrationTestIngest extends IntegrationTestBase {
 
   @Test
   public void testIngest() throws Exception {
-    runIngestTest(JUNIT_RUN_TIME, 2500, 10, 1024, 10);
+    runIngestTest(JUNIT_RUN_TIME, 2500, 10, 1024, 10, 20);
   }
 
-  private void internalRunIngestTest(long runTime) throws Exception {
-    runIngestTest(runTime, 2500, 10, 1024, 10);
+  protected void internalRunIngestTest(long runTime) throws Exception {
+    String clazz = this.getClass().getSimpleName();
+    long numKeysPerServer = conf.getLong(String.format("%s.%s", clazz, NUM_KEYS_PER_SERVER_KEY),
+      DEFAULT_NUM_KEYS_PER_SERVER);
+    int numWriteThreads = conf.getInt(
+      String.format("%s.%s", clazz, NUM_WRITE_THREADS_KEY), DEFAULT_NUM_WRITE_THREADS);
+    int numReadThreads = conf.getInt(
+      String.format("%s.%s", clazz, NUM_READ_THREADS_KEY), DEFAULT_NUM_READ_THREADS);
+    runIngestTest(runTime, numKeysPerServer, 10, 1024, numWriteThreads, numReadThreads);
   }
 
   @Override
   public String getTablename() {
-    return this.getClass().getSimpleName();
+    String clazz = this.getClass().getSimpleName();
+    return conf.get(String.format("%s.%s", clazz, LoadTestTool.OPT_TABLE_NAME), clazz);
   }
 
   @Override
@@ -104,8 +129,10 @@ public class IntegrationTestIngest extends IntegrationTestBase {
       util.deleteTable(Bytes.toBytes(getTablename()));
     }
   }
-  protected void runIngestTest(long defaultRunTime, int keysPerServerPerIter, int colsPerKey,
-      int recordSize, int writeThreads) throws Exception {
+
+  protected void runIngestTest(long defaultRunTime, long keysPerServerPerIter, int colsPerKey,
+      int recordSize, int writeThreads, int readThreads) throws Exception {
+
     LOG.info("Running ingest");
     LOG.info("Cluster size:" + util.getHBaseClusterInterface().getClusterStatus().getServersSize());
 
@@ -136,7 +163,8 @@ public class IntegrationTestIngest extends IntegrationTestBase {
         Assert.fail(errorMsg);
       }
 
-      ret = loadTool.run(getArgsForLoadTestTool("-read", "100:20", startKey, numKeys));
+      ret = loadTool.run(getArgsForLoadTestTool("-read", String.format("100:%d", readThreads)
+        , startKey, numKeys));
       if (0 != ret) {
         String errorMsg = "Verification failed with error code " + ret;
         LOG.error(errorMsg);
@@ -146,6 +174,23 @@ public class IntegrationTestIngest extends IntegrationTestBase {
     }
   }
 
+  protected String[] getArgsForLoadTestToolInitTable() {
+    List<String> args = new ArrayList<String>();
+    args.add("-tn");
+    args.add(getTablename());
+    // pass all remaining args from conf with keys <test class name>.<load test tool arg>
+    String clazz = this.getClass().getSimpleName();
+    for (String arg : LOAD_TEST_TOOL_INIT_ARGS) {
+      String val = conf.get(String.format("%s.%s", clazz, arg));
+      if (val != null) {
+        args.add("-" + arg);
+        args.add(val);
+      }
+    }
+    args.add("-init_only");
+    return args.toArray(new String[args.size()]);
+  }
+
   protected String[] getArgsForLoadTestTool(String mode, String modeSpecificArg, long startKey,
       long numKeys) {
     List<String> args = new ArrayList<String>();
@@ -158,11 +203,12 @@ public class IntegrationTestIngest extends IntegrationTestBase {
     args.add("-num_keys");
     args.add(String.valueOf(numKeys));
     args.add("-skip_init");
+
     return args.toArray(new String[args.size()]);
   }
 
   /** Estimates a data size based on the cluster size */
-  private long getNumKeys(int keysPerServer)
+  protected long getNumKeys(long keysPerServer)
       throws IOException {
     int numRegionServers = cluster.getClusterStatus().getServersSize();
     return keysPerServer * numRegionServers;

http://git-wip-us.apache.org/repos/asf/hbase/blob/f0d52fbe/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRandomRsExceptMetaAction.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRandomRsExceptMetaAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRandomRsExceptMetaAction.java
new file mode 100644
index 0000000..b78144a
--- /dev/null
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRandomRsExceptMetaAction.java
@@ -0,0 +1,42 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.chaos.actions;
+
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
+
+public class RestartRandomRsExceptMetaAction extends RestartRandomRsAction {
+  public RestartRandomRsExceptMetaAction(long sleepTime) {
+    super(sleepTime);
+  }
+
+  @Override
+  public void perform() throws Exception {
+    int tries = 10;
+
+    while (tries-- > 0 && getCurrentServers().length > 1) {
+      ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers());
+      ServerName metaServer = cluster.getServerHoldingMeta();
+      if (server != null && !server.equals(metaServer)) {
+        restartRs(server, sleepTime);
+        break;
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/f0d52fbe/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchRestartRsAction.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchRestartRsAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchRestartRsAction.java
index 2125529..4d0cc6b 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchRestartRsAction.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchRestartRsAction.java
@@ -32,37 +32,57 @@ import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
 
 /**
  * Restarts a ratio of the regionservers in a rolling fashion. At each step, either kills a
- * server, or starts one, sleeping randomly (0-sleepTime) in between steps.
+ * server, or starts one, sleeping randomly (0-sleepTime) in between steps. The parameter maxDeadServers
+ * limits the maximum number of servers that can be down at the same time during rolling restarts.
  */
 public class RollingBatchRestartRsAction extends BatchRestartRsAction {
   private static Log LOG = LogFactory.getLog(RollingBatchRestartRsAction.class);
+  protected int maxDeadServers; // number of maximum dead servers at any given time. Defaults to 5
 
   public RollingBatchRestartRsAction(long sleepTime, float ratio) {
+    this(sleepTime, ratio, 5);
+  }
+
+  public RollingBatchRestartRsAction(long sleepTime, float ratio, int maxDeadServers) {
     super(sleepTime, ratio);
+    this.maxDeadServers = maxDeadServers;
+  }
+
+  enum KillOrStart {
+    KILL,
+    START
   }
 
   @Override
   public void perform() throws Exception {
     LOG.info(String.format("Performing action: Rolling batch restarting %d%% of region servers",
         (int)(ratio * 100)));
-    List<ServerName> selectedServers = PolicyBasedChaosMonkey.selectRandomItems(getCurrentServers(),
-        ratio);
+    List<ServerName> selectedServers = selectServers();
 
     Queue<ServerName> serversToBeKilled = new LinkedList<ServerName>(selectedServers);
     Queue<ServerName> deadServers = new LinkedList<ServerName>();
 
-    //
+    // loop while there are servers to be killed or dead servers to be restarted
     while (!serversToBeKilled.isEmpty() || !deadServers.isEmpty()) {
-      boolean action = true; //action true = kill server, false = start server
+      KillOrStart action = KillOrStart.KILL;
 
-      if (serversToBeKilled.isEmpty() || deadServers.isEmpty()) {
-        action = deadServers.isEmpty();
+      if (serversToBeKilled.isEmpty()) { // no more servers to kill
+        action = KillOrStart.START;
+      } else if (deadServers.isEmpty()) {
+        action = KillOrStart.KILL; // no more servers to start
+      } else if (deadServers.size() >= maxDeadServers) {
+        // we have too many dead servers. Don't kill any more
+        action = KillOrStart.START;
       } else {
-        action = RandomUtils.nextBoolean();
+        // do a coin toss
+        action = RandomUtils.nextBoolean() ? KillOrStart.KILL : KillOrStart.START;
       }
 
-      if (action) {
-        ServerName server = serversToBeKilled.remove();
+      ServerName server;
+
+      switch (action) {
+      case KILL:
+         server = serversToBeKilled.remove();
         try {
           killRs(server);
         } catch (org.apache.hadoop.util.Shell.ExitCodeException e) {
@@ -71,21 +91,27 @@ public class RollingBatchRestartRsAction extends BatchRestartRsAction {
           LOG.info("Problem killing but presume successful; code=" + e.getExitCode(), e);
         }
         deadServers.add(server);
-      } else {
+        break;
+      case START:
         try {
-          ServerName server = deadServers.remove();
+          server = deadServers.remove();
           startRs(server);
         } catch (org.apache.hadoop.util.Shell.ExitCodeException e) {
           // The start may fail but better to just keep going though we may lose server.
           //
           LOG.info("Problem starting, will retry; code=" + e.getExitCode(), e);
         }
+        break;
       }
 
       sleep(RandomUtils.nextInt((int)sleepTime));
     }
   }
 
+  protected List<ServerName> selectServers() throws IOException {
+    return PolicyBasedChaosMonkey.selectRandomItems(getCurrentServers(), ratio);
+  }
+
   /**
    * Small test to ensure the class basically works.
    * @param args

http://git-wip-us.apache.org/repos/asf/hbase/blob/f0d52fbe/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchRestartRsExceptMetaAction.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchRestartRsExceptMetaAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchRestartRsExceptMetaAction.java
new file mode 100644
index 0000000..f03b8ec
--- /dev/null
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchRestartRsExceptMetaAction.java
@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.chaos.actions;
+
+import java.util.List;
+
+import org.apache.hadoop.hbase.ServerName;
+
+/**
+ * Same as in {@link RollingBatchRestartRsAction} except that this action
+ * does not restart the region server holding the META table.
+ */
+public class RollingBatchRestartRsExceptMetaAction extends RollingBatchRestartRsAction {
+
+  public RollingBatchRestartRsExceptMetaAction(long sleepTime, float ratio, int maxDeadServers) {
+    super(sleepTime, ratio, maxDeadServers);
+  }
+
+  @Override
+  protected List<ServerName> selectServers() throws java.io.IOException {
+    ServerName metaServer = cluster.getServerHoldingMeta();
+    List<ServerName> servers = super.selectServers();
+    servers.remove(metaServer);
+    return servers;
+  };
+
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/f0d52fbe/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java
index 8f5e610..0ae5d1d 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java
@@ -21,10 +21,11 @@ package org.apache.hadoop.hbase.chaos.factories;
 import java.util.Map;
 import java.util.Set;
 
-import com.google.common.collect.ImmutableMap;
 import org.apache.hadoop.hbase.IntegrationTestingUtility;
 import org.apache.hadoop.hbase.chaos.monkies.ChaosMonkey;
 
+import com.google.common.collect.ImmutableMap;
+
 /**
  * Base class of the factory that will create a ChaosMonkey.
  */
@@ -51,17 +52,18 @@ public abstract class MonkeyFactory {
 
   public abstract ChaosMonkey build();
 
-
   public static final String CALM = "calm";
   // TODO: the name has become a misnomer since the default (not-slow) monkey has been removed
   public static final String SLOW_DETERMINISTIC = "slowDeterministic";
   public static final String UNBALANCE = "unbalance";
+  public static final String SERVER_KILLING = "serverKilling";
   public static final String NO_KILL = "noKill";
 
   public static Map<String, MonkeyFactory> FACTORIES = ImmutableMap.<String,MonkeyFactory>builder()
     .put(CALM, new CalmMonkeyFactory())
     .put(SLOW_DETERMINISTIC, new SlowDeterministicMonkeyFactory())
     .put(UNBALANCE, new UnbalanceMonkeyFactory())
+    .put(SERVER_KILLING, new ServerKillingMonkeyFactory())
     .put(NO_KILL, new NoKillMonkeyFactory())
     .build();
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/f0d52fbe/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java
new file mode 100644
index 0000000..02b5914
--- /dev/null
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.chaos.factories;
+
+import org.apache.hadoop.hbase.chaos.actions.Action;
+import org.apache.hadoop.hbase.chaos.actions.DumpClusterStatusAction;
+import org.apache.hadoop.hbase.chaos.actions.ForceBalancerAction;
+import org.apache.hadoop.hbase.chaos.actions.RestartActiveMasterAction;
+import org.apache.hadoop.hbase.chaos.actions.RestartRandomRsExceptMetaAction;
+import org.apache.hadoop.hbase.chaos.actions.RollingBatchRestartRsExceptMetaAction;
+import org.apache.hadoop.hbase.chaos.monkies.ChaosMonkey;
+import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
+import org.apache.hadoop.hbase.chaos.policies.CompositeSequentialPolicy;
+import org.apache.hadoop.hbase.chaos.policies.DoActionsOncePolicy;
+import org.apache.hadoop.hbase.chaos.policies.PeriodicRandomActionPolicy;
+
+/**
+ * Creates ChaosMonkeys for doing server restart actions, but not
+ * flush / compact / snapshot kind of actions.
+ */
+public class ServerKillingMonkeyFactory extends MonkeyFactory {
+
+  @Override
+  public ChaosMonkey build() {
+
+    // Destructive actions to mess things around. Cannot run batch restart
+    Action[] actions1 = new Action[] {
+        new RestartRandomRsExceptMetaAction(60000),
+        new RestartActiveMasterAction(5000),
+        new RollingBatchRestartRsExceptMetaAction(5000, 1.0f, 2), //only allow 2 servers to be dead
+        new ForceBalancerAction()
+    };
+
+    // Action to log more info for debugging
+    Action[] actions2 = new Action[] {
+        new DumpClusterStatusAction()
+    };
+
+    return new PolicyBasedChaosMonkey(util,
+      new CompositeSequentialPolicy(
+          new DoActionsOncePolicy(60 * 1000, actions1),
+          new PeriodicRandomActionPolicy(60 * 1000, actions1)),
+      new PeriodicRandomActionPolicy(60 * 1000, actions2));
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/f0d52fbe/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
index 021b43a..2bd0abc 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
@@ -3251,11 +3251,26 @@ public class HBaseTestingUtility extends HBaseCommonTestingUtility {
   public static int createPreSplitLoadTestTable(Configuration conf,
       TableName tableName, byte[] columnFamily, Algorithm compression,
       DataBlockEncoding dataBlockEncoding) throws IOException {
+    return createPreSplitLoadTestTable(conf, tableName,
+      columnFamily, compression, dataBlockEncoding, DEFAULT_REGIONS_PER_SERVER,
+      Durability.USE_DEFAULT);
+  }
+  /**
+   * Creates a pre-split table for load testing. If the table already exists,
+   * logs a warning and continues.
+   * @return the number of regions the table was split into
+   */
+  public static int createPreSplitLoadTestTable(Configuration conf,
+      TableName tableName, byte[] columnFamily, Algorithm compression,
+      DataBlockEncoding dataBlockEncoding, int numRegionsPerServer,
+      Durability durability)
+          throws IOException {
     HTableDescriptor desc = new HTableDescriptor(tableName);
+    desc.setDurability(durability);
     HColumnDescriptor hcd = new HColumnDescriptor(columnFamily);
     hcd.setDataBlockEncoding(dataBlockEncoding);
     hcd.setCompressionType(compression);
-    return createPreSplitLoadTestTable(conf, desc, hcd);
+    return createPreSplitLoadTestTable(conf, desc, hcd, numRegionsPerServer);
   }
 
   /**
@@ -3265,6 +3280,16 @@ public class HBaseTestingUtility extends HBaseCommonTestingUtility {
    */
   public static int createPreSplitLoadTestTable(Configuration conf,
       HTableDescriptor desc, HColumnDescriptor hcd) throws IOException {
+    return createPreSplitLoadTestTable(conf, desc, hcd, DEFAULT_REGIONS_PER_SERVER);
+  }
+
+  /**
+   * Creates a pre-split table for load testing. If the table already exists,
+   * logs a warning and continues.
+   * @return the number of regions the table was split into
+   */
+  public static int createPreSplitLoadTestTable(Configuration conf,
+      HTableDescriptor desc, HColumnDescriptor hcd, int numRegionsPerServer) throws IOException {
     if (!desc.hasFamily(hcd.getName())) {
       desc.addFamily(hcd);
     }
@@ -3280,11 +3305,10 @@ public class HBaseTestingUtility extends HBaseCommonTestingUtility {
         throw new IllegalStateException("No live regionservers");
       }
 
-      int regionsPerServer = conf.getInt(REGIONS_PER_SERVER_KEY, DEFAULT_REGIONS_PER_SERVER);
-      totalNumberOfRegions = numberOfServers * regionsPerServer;
+      totalNumberOfRegions = numberOfServers * numRegionsPerServer;
       LOG.info("Number of live regionservers: " + numberOfServers + ", " +
           "pre-splitting table into " + totalNumberOfRegions + " regions " +
-          "(default regions per server: " + regionsPerServer + ")");
+          "(regions per server: " + numRegionsPerServer + ")");
 
       byte[][] splits = new RegionSplitter.HexStringSplit().split(
           totalNumberOfRegions);

http://git-wip-us.apache.org/repos/asf/hbase/blob/f0d52fbe/hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java
index 0efc7d3..b318780 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java
@@ -110,10 +110,11 @@ public class LoadTestTool extends AbstractHBaseTool {
         + "compression) to use for data blocks in the test column family, "
         + "one of " + Arrays.toString(DataBlockEncoding.values()) + ".";
 
-  private static final String OPT_BLOOM = "bloom";
-  private static final String OPT_COMPRESSION = "compression";
-  private static final String OPT_DEFERRED_LOG_FLUSH = "deferredlogflush";
+  public static final String OPT_BLOOM = "bloom";
+  public static final String OPT_COMPRESSION = "compression";
+  public static final String OPT_DEFERRED_LOG_FLUSH = "deferredlogflush";
   public static final String OPT_DEFERRED_LOG_FLUSH_USAGE = "Enable deferred log flush.";
+
   public static final String OPT_DATA_BLOCK_ENCODING =
       HColumnDescriptor.DATA_BLOCK_ENCODING.toLowerCase();
 
@@ -125,6 +126,9 @@ public class LoadTestTool extends AbstractHBaseTool {
   public static final String OPT_GENERATOR_USAGE = "The class which generates load for the tool."
       + " Any args for this class can be passed as colon separated after class name";
 
+  public static final String OPT_READER = "reader";
+  public static final String OPT_READER_USAGE = "The class for executing the read requests";
+
   protected static final String OPT_KEY_WINDOW = "key_window";
   protected static final String OPT_WRITE = "write";
   protected static final String OPT_MAX_READ_ERRORS = "max_read_errors";
@@ -132,7 +136,7 @@ public class LoadTestTool extends AbstractHBaseTool {
   protected static final String OPT_NUM_KEYS = "num_keys";
   protected static final String OPT_READ = "read";
   protected static final String OPT_START_KEY = "start_key";
-  protected static final String OPT_TABLE_NAME = "tn";
+  public static final String OPT_TABLE_NAME = "tn";
   protected static final String OPT_ZK_QUORUM = "zk";
   protected static final String OPT_ZK_PARENT_NODE = "zk_root";
   protected static final String OPT_SKIP_INIT = "skip_init";
@@ -142,11 +146,16 @@ public class LoadTestTool extends AbstractHBaseTool {
   protected static final String OPT_BATCHUPDATE = "batchupdate";
   protected static final String OPT_UPDATE = "update";
 
-  protected static final String OPT_ENCRYPTION = "encryption";
+  public static final String OPT_ENCRYPTION = "encryption";
   protected static final String OPT_ENCRYPTION_USAGE =
     "Enables transparent encryption on the test table, one of " +
     Arrays.toString(Encryption.getSupportedCiphers());
 
+  public static final String OPT_NUM_REGIONS_PER_SERVER = "num_regions_per_server";
+  protected static final String OPT_NUM_REGIONS_PER_SERVER_USAGE
+    = "Desired number of regions per region server. Defaults to 5.";
+  protected static int DEFAULT_NUM_REGIONS_PER_SERVER = 5;
+
   protected static final long DEFAULT_START_KEY = 0;
 
   /** This will be removed as we factor out the dependency on command line */
@@ -292,6 +301,7 @@ public class LoadTestTool extends AbstractHBaseTool {
         "separate updates for every column in a row");
     addOptNoArg(OPT_INMEMORY, OPT_USAGE_IN_MEMORY);
     addOptWithArg(OPT_GENERATOR, OPT_GENERATOR_USAGE);
+    addOptWithArg(OPT_READER, OPT_READER_USAGE);
 
     addOptWithArg(OPT_NUM_KEYS, "The number of keys to read/write");
     addOptWithArg(OPT_START_KEY, "The first key to read/write " +
@@ -311,6 +321,7 @@ public class LoadTestTool extends AbstractHBaseTool {
 
     addOptWithArg(OPT_ENCRYPTION, OPT_ENCRYPTION_USAGE);
     addOptNoArg(OPT_DEFERRED_LOG_FLUSH, OPT_DEFERRED_LOG_FLUSH_USAGE);
+    addOptWithArg(OPT_NUM_REGIONS_PER_SERVER, OPT_NUM_REGIONS_PER_SERVER_USAGE);
   }
 
   @Override
@@ -421,13 +432,11 @@ public class LoadTestTool extends AbstractHBaseTool {
     if (cmd.hasOption(NUM_TABLES)) {
       numTables = parseInt(cmd.getOptionValue(NUM_TABLES), 1, Short.MAX_VALUE);
     }
-    regionsPerServer = HBaseTestingUtility.DEFAULT_REGIONS_PER_SERVER;
-    if (cmd.hasOption(OPT_REGIONS_PER_SERVER)) {
-      regionsPerServer = parseInt(cmd.getOptionValue(OPT_REGIONS_PER_SERVER), 1,
-        Integer.MAX_VALUE);
-      conf.setInt(HBaseTestingUtility.REGIONS_PER_SERVER_KEY, regionsPerServer);
+
+    regionsPerServer = DEFAULT_NUM_REGIONS_PER_SERVER;
+    if (cmd.hasOption(OPT_NUM_REGIONS_PER_SERVER)) {
+      regionsPerServer = Integer.parseInt(cmd.getOptionValue(OPT_NUM_REGIONS_PER_SERVER));
     }
-    System.out.println("Regions per server: " + regionsPerServer);
   }
 
   private void parseColumnFamilyOptions(CommandLine cmd) {
@@ -451,14 +460,14 @@ public class LoadTestTool extends AbstractHBaseTool {
   }
 
   public void initTestTable() throws IOException {
-    HTableDescriptor desc = new HTableDescriptor(tableName);
+    Durability durability = Durability.USE_DEFAULT;
     if (deferredLogFlush) {
-      desc.setDurability(Durability.ASYNC_WAL);
+      durability = Durability.ASYNC_WAL;
     }
-    HColumnDescriptor hcd = new HColumnDescriptor(COLUMN_FAMILY);
-    hcd.setDataBlockEncoding(dataBlockEncodingAlgo);
-    hcd.setCompressionType(compressAlgo);
-    HBaseTestingUtility.createPreSplitLoadTestTable(conf, desc, hcd);
+
+    HBaseTestingUtility.createPreSplitLoadTestTable(conf, tableName,
+        COLUMN_FAMILY, compressAlgo, dataBlockEncodingAlgo, regionsPerServer,
+        durability);
     applyColumnFamilyOptions(tableName, COLUMN_FAMILIES);
   }
 
@@ -577,7 +586,13 @@ public class LoadTestTool extends AbstractHBaseTool {
         readerThreads = new MultiThreadedReaderWithACL(dataGen, conf, tableName, verifyPercent,
             userNames);
       } else {
-        readerThreads = new MultiThreadedReader(dataGen, conf, tableName, verifyPercent);
+        String readerClass = null;
+        if (cmd.hasOption(OPT_READER)) {
+          readerClass = cmd.getOptionValue(OPT_READER);
+        } else {
+          readerClass = MultiThreadedReader.class.getCanonicalName();
+        }
+        readerThreads = getMultiThreadedReaderInstance(readerClass, dataGen);
       }
       readerThreads.setMaxErrors(maxReadErrors);
       readerThreads.setKeyWindow(keyWindow);
@@ -651,6 +666,18 @@ public class LoadTestTool extends AbstractHBaseTool {
     }
   }
 
+  private MultiThreadedReader getMultiThreadedReaderInstance(String clazzName
+      , LoadTestDataGenerator dataGen) throws IOException {
+    try {
+      Class<?> clazz = Class.forName(clazzName);
+      Constructor<?> constructor = clazz.getConstructor(
+        LoadTestDataGenerator.class, Configuration.class, TableName.class, double.class);
+      return (MultiThreadedReader) constructor.newInstance(dataGen, conf, tableName, verifyPercent);
+    } catch (Exception e) {
+      throw new IOException(e);
+    }
+  }
+
   public static byte[] generateData(final Random r, int length) {
     byte [] b = new byte [length];
     int i = 0;

http://git-wip-us.apache.org/repos/asf/hbase/blob/f0d52fbe/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReader.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReader.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReader.java
index 0edeea7..b0d44fd 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReader.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReader.java
@@ -39,7 +39,7 @@ public class MultiThreadedReader extends MultiThreadedAction
 
   protected Set<HBaseReaderThread> readers = new HashSet<HBaseReaderThread>();
   private final double verifyPercent;
-  private volatile boolean aborted;
+  protected volatile boolean aborted;
 
   protected MultiThreadedWriterBase writer = null;
 
@@ -104,11 +104,15 @@ public class MultiThreadedReader extends MultiThreadedAction
 
   protected void addReaderThreads(int numThreads) throws IOException {
     for (int i = 0; i < numThreads; ++i) {
-      HBaseReaderThread reader = new HBaseReaderThread(i);
+      HBaseReaderThread reader = createReaderThread(i);
       readers.add(reader);
     }
   }
 
+  protected HBaseReaderThread createReaderThread(int readerId) throws IOException {
+    return new HBaseReaderThread(readerId);
+  }
+
   public class HBaseReaderThread extends Thread {
     protected final int readerId;
     protected final HTable table;
@@ -122,6 +126,8 @@ public class MultiThreadedReader extends MultiThreadedAction
     /** If we are ahead of the writer and reading a random key. */
     private boolean readingRandomKey;
 
+    private boolean printExceptionTrace = true;
+
     /**
      * @param readerId only the keys with this remainder from division by
      *          {@link #numThreads} will be read by this thread
@@ -204,7 +210,7 @@ public class MultiThreadedReader extends MultiThreadedAction
       return Math.min(endKey - 1, writer.wroteUpToKey() - keyWindow);
     }
 
-    private long getNextKeyToRead() {
+    protected long getNextKeyToRead() {
       readingRandomKey = false;
       if (writer == null || curKey <= maxKeyWeCanRead()) {
         return curKey++;
@@ -235,6 +241,24 @@ public class MultiThreadedReader extends MultiThreadedAction
     }
 
     private Get readKey(long keyToRead) {
+      Get get = null;
+      try {
+        get = createGet(keyToRead);
+        queryKey(get, RandomUtils.nextInt(100) < verifyPercent, keyToRead);
+      } catch (IOException e) {
+        numReadFailures.addAndGet(1);
+        LOG.debug("[" + readerId + "] FAILED read, key = " + (keyToRead + "")
+            + ", time from start: "
+            + (System.currentTimeMillis() - startTimeMs) + " ms");
+        if (printExceptionTrace) {
+          LOG.warn(e);
+          printExceptionTrace = false;
+        }
+      }
+      return get;
+    }
+
+    protected Get createGet(long keyToRead) throws IOException {
       Get get = new Get(dataGenerator.getDeterministicUniqueKey(keyToRead));
       String cfsString = "";
       byte[][] columnFamilies = dataGenerator.getColumnFamilies();
@@ -247,18 +271,9 @@ public class MultiThreadedReader extends MultiThreadedAction
           cfsString += "[" + Bytes.toStringBinary(cf) + "]";
         }
       }
-
-      try {
-        get = dataGenerator.beforeGet(keyToRead, get);
-        if (verbose) {
-          LOG.info("[" + readerId + "] " + "Querying key " + keyToRead + ", cfs " + cfsString);
-        }
-        queryKey(get, RandomUtils.nextInt(100) < verifyPercent, keyToRead);
-      } catch (IOException e) {
-        numReadFailures.addAndGet(1);
-        LOG.debug("[" + readerId + "] FAILED read, key = " + (keyToRead + "")
-            + ", time from start: "
-            + (System.currentTimeMillis() - startTimeMs) + " ms");
+      get = dataGenerator.beforeGet(keyToRead, get);
+      if (verbose) {
+        LOG.info("[" + readerId + "] " + "Querying key " + keyToRead + ", cfs " + cfsString);
       }
       return get;
     }
@@ -267,15 +282,16 @@ public class MultiThreadedReader extends MultiThreadedAction
       String rowKey = Bytes.toString(get.getRow());
 
       // read the data
-      long start = System.currentTimeMillis();
+      long start = System.nanoTime();
       Result result = table.get(get);
-      getResultMetricUpdation(verify, rowKey, start, result, table, false);
+      long end = System.nanoTime();
+      verifyResultsAndUpdateMetrics(verify, rowKey, end - start, result, table, false);
     }
 
-    protected void getResultMetricUpdation(boolean verify, String rowKey, long start,
+    protected void verifyResultsAndUpdateMetrics(boolean verify, String rowKey, long elapsedNano,
         Result result, HTable table, boolean isNullExpected)
         throws IOException {
-      totalOpTimeMs.addAndGet(System.currentTimeMillis() - start);
+      totalOpTimeMs.addAndGet(elapsedNano / 1000000);
       numKeys.addAndGet(1);
       if (!result.isEmpty()) {
         if (verify) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/f0d52fbe/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReaderWithACL.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReaderWithACL.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReaderWithACL.java
index 1aa4870..29fc5f2 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReaderWithACL.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReaderWithACL.java
@@ -89,7 +89,7 @@ public class MultiThreadedReaderWithACL extends MultiThreadedReader {
       final String rowKey = Bytes.toString(get.getRow());
 
       // read the data
-      final long start = System.currentTimeMillis();
+      final long start = System.nanoTime();
       PrivilegedExceptionAction<Object> action = new PrivilegedExceptionAction<Object>() {
         @Override
         public Object run() throws Exception {
@@ -107,7 +107,8 @@ public class MultiThreadedReaderWithACL extends MultiThreadedReader {
               result = localTable.get(get);
             }
             boolean isNullExpected = ((((int) keyToRead % specialPermCellInsertionFactor)) == 0);
-            getResultMetricUpdation(verify, rowKey, start, result, localTable, isNullExpected);
+            long end = System.nanoTime();
+            verifyResultsAndUpdateMetrics(verify, rowKey, end - start, result, localTable, isNullExpected);
           } catch (IOException e) {
             recordFailure(keyToRead);
           }


[2/5] git commit: HBASE-11348 Make frequency and sleep times of chaos monkeys configurable (Vandan Ayyalasomayajula)

Posted by ap...@apache.org.
HBASE-11348 Make frequency and sleep times of chaos monkeys configurable (Vandan Ayyalasomayajula)

Conflicts:
	hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestBase.java
	hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/NoKillMonkeyFactory.java


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/30d4d5de
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/30d4d5de
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/30d4d5de

Branch: refs/heads/0.98
Commit: 30d4d5de59dd7211ca5e48e639454424b316818b
Parents: f0d52fb
Author: Michael Stack <st...@apache.org>
Authored: Wed Oct 29 20:56:57 2014 -0700
Committer: Andrew Purtell <ap...@apache.org>
Committed: Wed Oct 29 20:56:57 2014 -0700

----------------------------------------------------------------------
 .../hadoop/hbase/IntegrationTestBase.java       | 26 +++++
 .../chaos/actions/MoveRegionsOfTableAction.java |  7 +-
 .../UnbalanceKillAndRebalanceAction.java        | 19 ++--
 .../hbase/chaos/factories/MonkeyConstants.java  | 64 +++++++++++++
 .../hbase/chaos/factories/MonkeyFactory.java    |  9 ++
 .../chaos/factories/NoKillMonkeyFactory.java    |  2 +-
 .../SlowDeterministicMonkeyFactory.java         | 99 +++++++++++++++++---
 .../chaos/factories/UnbalanceMonkeyFactory.java | 28 +++++-
 .../hadoop/hbase/mttr/IntegrationTestMTTR.java  |  4 +-
 9 files changed, 228 insertions(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/30d4d5de/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestBase.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestBase.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestBase.java
index b705e79..d7daae8 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestBase.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestBase.java
@@ -18,9 +18,12 @@
 
 package org.apache.hadoop.hbase;
 
+import java.io.IOException;
+import java.util.Properties;
 import java.util.Set;
 
 import org.apache.commons.cli.CommandLine;
+import org.apache.commons.lang.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -32,14 +35,21 @@ import org.junit.Before;
 
 /**
  * Base class for HBase integration tests that want to use the Chaos Monkey.
+ * Usage: bin/hbase <sub_class_of_IntegrationTestBase> <options>
+ * Options: -h,--help Show usage
+ *          -m,--monkey <arg> Which chaos monkey to run
+ *          -monkeyProps <arg> The properties file for specifying chaos monkey properties.
+ *          -ncc Option to not clean up the cluster at the end.
  */
 public abstract class IntegrationTestBase extends AbstractHBaseTool {
   public static final String LONG_OPT = "monkey";
+  public static final String CHAOS_MONKEY_PROPS = "monkeyProps";
   private static final Log LOG = LogFactory.getLog(IntegrationTestBase.class);
 
   protected IntegrationTestingUtility util;
   protected ChaosMonkey monkey;
   protected String monkeyToUse;
+  protected Properties monkeyProps;
 
   public IntegrationTestBase() {
     this(null);
@@ -52,6 +62,8 @@ public abstract class IntegrationTestBase extends AbstractHBaseTool {
   @Override
   protected void addOptions() {
     addOptWithArg("m", LONG_OPT, "Which chaos monkey to run");
+    addOptWithArg(CHAOS_MONKEY_PROPS, "The properties file for specifying chaos "
+        + "monkey properties.");
   }
 
   @Override
@@ -59,6 +71,19 @@ public abstract class IntegrationTestBase extends AbstractHBaseTool {
     if (cmd.hasOption(LONG_OPT)) {
       monkeyToUse = cmd.getOptionValue(LONG_OPT);
     }
+    monkeyProps = new Properties();
+    if (cmd.hasOption(CHAOS_MONKEY_PROPS)) {
+      String chaosMonkeyPropsFile = cmd.getOptionValue(CHAOS_MONKEY_PROPS);
+      if (StringUtils.isNotEmpty(chaosMonkeyPropsFile)) {
+        try {
+          monkeyProps.load(this.getClass().getClassLoader()
+              .getResourceAsStream(chaosMonkeyPropsFile));
+        } catch (IOException e) {
+          LOG.warn(e);
+          System.exit(EXIT_FAILURE);
+        }
+      }
+    }
   }
 
   @Override
@@ -104,6 +129,7 @@ public abstract class IntegrationTestBase extends AbstractHBaseTool {
     }
     monkey = fact.setUtil(util)
                  .setTableName(getTablename())
+                 .setProperties(monkeyProps)
                  .setColumnFamilies(getColumnFamilies()).build();
     startMonkey();
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/30d4d5de/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/MoveRegionsOfTableAction.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/MoveRegionsOfTableAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/MoveRegionsOfTableAction.java
index 4c5db1e..d40ef07 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/MoveRegionsOfTableAction.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/MoveRegionsOfTableAction.java
@@ -25,6 +25,7 @@ import java.util.List;
 import org.apache.commons.lang.math.RandomUtils;
 import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.chaos.factories.MonkeyConstants;
 import org.apache.hadoop.hbase.client.HBaseAdmin;
 import org.apache.hadoop.hbase.util.Bytes;
 
@@ -38,14 +39,14 @@ public class MoveRegionsOfTableAction extends Action {
   private final long maxTime;
 
   public MoveRegionsOfTableAction(String tableName) {
-    this(-1, tableName);
+    this(-1, MonkeyConstants.DEFAULT_MOVE_REGIONS_MAX_TIME, tableName);
   }
 
-  public MoveRegionsOfTableAction(long sleepTime, String tableName) {
+  public MoveRegionsOfTableAction(long sleepTime, long maxSleepTime, String tableName) {
     this.sleepTime = sleepTime;
     this.tableNameBytes = Bytes.toBytes(tableName);
     this.tableName = tableName;
-    this.maxTime = 10 * 60 * 1000; // 10 min default
+    this.maxTime = maxSleepTime;
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hbase/blob/30d4d5de/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/UnbalanceKillAndRebalanceAction.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/UnbalanceKillAndRebalanceAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/UnbalanceKillAndRebalanceAction.java
index 540b67a..a97a9c4 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/UnbalanceKillAndRebalanceAction.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/UnbalanceKillAndRebalanceAction.java
@@ -36,9 +36,16 @@ public class UnbalanceKillAndRebalanceAction extends Action {
   private static final double HOARD_FRC_OF_REGIONS = 0.8;
   /** Waits between calling unbalance and killing servers, kills and rebalance, and rebalance
    * and restarting the servers; to make sure these events have time to impact the cluster. */
-  private static final long WAIT_FOR_UNBALANCE_MS = 2 * 1000;
-  private static final long WAIT_FOR_KILLS_MS = 2 * 1000;
-  private static final long WAIT_AFTER_BALANCE_MS = 5 * 1000;
+  private long waitForUnbalanceMilliSec;
+  private long waitForKillsMilliSec;
+  private long waitAfterBalanceMilliSec;
+
+  public UnbalanceKillAndRebalanceAction(long waitUnbalance, long waitKill, long waitAfterBalance) {
+    super();
+    waitForUnbalanceMilliSec = waitUnbalance;
+    waitForKillsMilliSec = waitKill;
+    waitAfterBalanceMilliSec = waitAfterBalance;
+  }
 
   @Override
   public void perform() throws Exception {
@@ -53,13 +60,13 @@ public class UnbalanceKillAndRebalanceAction extends Action {
       targetServers.add(victimServers.remove(victimIx));
     }
     unbalanceRegions(status, victimServers, targetServers, HOARD_FRC_OF_REGIONS);
-    Thread.sleep(WAIT_FOR_UNBALANCE_MS);
+    Thread.sleep(waitForUnbalanceMilliSec);
     for (int i = 0; i < liveCount; ++i) {
       killRs(targetServers.get(i));
     }
-    Thread.sleep(WAIT_FOR_KILLS_MS);
+    Thread.sleep(waitForKillsMilliSec);
     forceBalancer();
-    Thread.sleep(WAIT_AFTER_BALANCE_MS);
+    Thread.sleep(waitAfterBalanceMilliSec);
     for (int i = 0; i < liveCount; ++i) {
       startRs(targetServers.get(i));
     }

http://git-wip-us.apache.org/repos/asf/hbase/blob/30d4d5de/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java
new file mode 100644
index 0000000..3333b26
--- /dev/null
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.chaos.factories;
+
+public interface MonkeyConstants {
+
+  public static final String PERIODIC_ACTION1_PERIOD = "sdm.action1.period";
+  public static final String PERIODIC_ACTION2_PERIOD = "sdm.action2.period";
+  public static final String PERIODIC_ACTION4_PERIOD = "sdm.action4.period";
+  public static final String COMPOSITE_ACTION3_PERIOD = "sdm.action3.period";
+  public static final String MOVE_REGIONS_MAX_TIME = "move.regions.max.time";
+  public static final String MOVE_REGIONS_SLEEP_TIME = "move.regions.sleep.time";
+  public static final String MOVE_RANDOM_REGION_SLEEP_TIME = "move.randomregion.sleep.time";
+  public static final String RESTART_RANDOM_RS_SLEEP_TIME = "restart.random.rs.sleep.time";
+  public static final String BATCH_RESTART_RS_SLEEP_TIME = "batch.restart.rs.sleep.time";
+  public static final String BATCH_RESTART_RS_RATIO = "batch.restart.rs.ratio";
+  public static final String RESTART_ACTIVE_MASTER_SLEEP_TIME = "restart.active.master.sleep.time";
+  public static final String ROLLING_BATCH_RESTART_RS_SLEEP_TIME = "rolling.batch.restart.rs.sleep.time";
+  public static final String ROLLING_BATCH_RESTART_RS_RATIO = "rolling.batch.restart.rs.ratio";
+  public static final String RESTART_RS_HOLDING_META_SLEEP_TIME = "restart.rs.holding.meta.sleep.time";
+  public static final String COMPACT_TABLE_ACTION_RATIO = "compact.table.ratio";
+  public static final String COMPACT_RANDOM_REGION_RATIO = "compact.random.region.ratio";
+  public static final String UNBALANCE_CHAOS_EVERY_MS = "unbalance.chaos.period";
+  public static final String UNBALANCE_WAIT_FOR_UNBALANCE_MS = "unbalance.action.wait.period";
+  public static final String UNBALANCE_WAIT_FOR_KILLS_MS = "unbalance.action.kill.period";
+  public static final String UNBALANCE_WAIT_AFTER_BALANCE_MS = "unbalance.action.wait.after.period";
+
+  public static final long DEFAULT_PERIODIC_ACTION1_PERIOD = 60 * 1000;
+  public static final long DEFAULT_PERIODIC_ACTION2_PERIOD = 90 * 1000;
+  public static final long DEFAULT_PERIODIC_ACTION4_PERIOD = 90 * 1000;
+  public static final long DEFAULT_COMPOSITE_ACTION3_PERIOD = 150 * 1000;
+  public static final long DEFAULT_MOVE_REGIONS_MAX_TIME = 10 * 60 * 1000;
+  public static final long DEFAULT_MOVE_REGIONS_SLEEP_TIME = 800;
+  public static final long DEFAULT_MOVE_RANDOM_REGION_SLEEP_TIME = 800;
+  public static final long DEFAULT_RESTART_RANDOM_RS_SLEEP_TIME = 60000;
+  public static final long DEFAULT_BATCH_RESTART_RS_SLEEP_TIME = 5000;
+  public static final float DEFAULT_BATCH_RESTART_RS_RATIO = 0.5f;
+  public static final long DEFAULT_RESTART_ACTIVE_MASTER_SLEEP_TIME = 5000;
+  public static final long DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME = 5000;
+  public static final float DEFAULT_ROLLING_BATCH_RESTART_RS_RATIO = 1.0f;
+  public static final long DEFAULT_RESTART_RS_HOLDING_META_SLEEP_TIME = 35000;
+  public static final float DEFAULT_COMPACT_TABLE_ACTION_RATIO = 0.5f;
+  public static final float DEFAULT_COMPACT_RANDOM_REGION_RATIO = 0.6f;
+  public static final long DEFAULT_UNBALANCE_CHAOS_EVERY_MS = 65 * 1000;
+  public static final long DEFAULT_UNBALANCE_WAIT_FOR_UNBALANCE_MS = 2 * 1000;
+  public static final long DEFAULT_UNBALANCE_WAIT_FOR_KILLS_MS = 2 * 1000;
+  public static final long DEFAULT_UNBALANCE_WAIT_AFTER_BALANCE_MS = 5 * 1000;
+
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/30d4d5de/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java
index 0ae5d1d..944fe14 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hbase.chaos.factories;
 
 import java.util.Map;
+import java.util.Properties;
 import java.util.Set;
 
 import org.apache.hadoop.hbase.IntegrationTestingUtility;
@@ -34,6 +35,7 @@ public abstract class MonkeyFactory {
   protected String tableName;
   protected Set<String> columnFamilies;
   protected IntegrationTestingUtility util;
+  protected Properties properties = new Properties();
 
   public MonkeyFactory setTableName(String tableName) {
     this.tableName = tableName;
@@ -50,6 +52,13 @@ public abstract class MonkeyFactory {
     return this;
   }
 
+  public MonkeyFactory setProperties(Properties props) {
+    if (props != null) {
+      this.properties = props;
+    }
+    return this;
+  }
+
   public abstract ChaosMonkey build();
 
   public static final String CALM = "calm";

http://git-wip-us.apache.org/repos/asf/hbase/blob/30d4d5de/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/NoKillMonkeyFactory.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/NoKillMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/NoKillMonkeyFactory.java
index 5b4035e..c41a7a8 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/NoKillMonkeyFactory.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/NoKillMonkeyFactory.java
@@ -67,7 +67,7 @@ public class NoKillMonkeyFactory extends MonkeyFactory {
     };
 
     Action[] actions3 = new Action[] {
-        new MoveRegionsOfTableAction(800,tableName),
+        new MoveRegionsOfTableAction(800,1600,tableName),
         new MoveRandomRegionOfTableAction(800,tableName),
     };
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/30d4d5de/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java
index 2b7ea20..70d64b9 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java
@@ -45,17 +45,37 @@ import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
 import org.apache.hadoop.hbase.chaos.policies.CompositeSequentialPolicy;
 import org.apache.hadoop.hbase.chaos.policies.DoActionsOncePolicy;
 import org.apache.hadoop.hbase.chaos.policies.PeriodicRandomActionPolicy;
+import org.junit.Assert;
 
 public class SlowDeterministicMonkeyFactory extends MonkeyFactory {
+
+  private long action1Period;
+  private long action2Period;
+  private long action3Period;
+  private long action4Period;
+  private long moveRegionsMaxTime;
+  private long moveRegionsSleepTime;
+  private long moveRandomRegionSleepTime;
+  private long restartRandomRSSleepTime;
+  private long batchRestartRSSleepTime;
+  private float batchRestartRSRatio;
+  private long restartActiveMasterSleepTime;
+  private long rollingBatchRestartRSSleepTime;
+  private float rollingBatchRestartRSRatio;
+  private long restartRsHoldingMetaSleepTime;
+  private float compactTableRatio;
+  private float compactRandomRegionRatio;
+
   @Override
   public ChaosMonkey build() {
 
+    loadProperties();
     // Actions such as compact/flush a table/region,
     // move one region around. They are not so destructive,
     // can be executed more frequently.
     Action[] actions1 = new Action[] {
-        new CompactTableAction(tableName, 0.5f),
-        new CompactRandomRegionOfTableAction(tableName, 0.6f),
+        new CompactTableAction(tableName, compactTableRatio),
+        new CompactRandomRegionOfTableAction(tableName, compactRandomRegionRatio),
         new FlushTableAction(tableName),
         new FlushRandomRegionOfTableAction(tableName),
         new MoveRandomRegionOfTableAction(tableName)
@@ -78,13 +98,15 @@ public class SlowDeterministicMonkeyFactory extends MonkeyFactory {
 
     // Destructive actions to mess things around.
     Action[] actions3 = new Action[] {
-        new MoveRegionsOfTableAction(800, tableName),
-        new MoveRandomRegionOfTableAction(800, tableName),
-        new RestartRandomRsAction(60000),
-        new BatchRestartRsAction(5000, 0.5f),
-        new RestartActiveMasterAction(5000),
-        new RollingBatchRestartRsAction(5000, 1.0f),
-        new RestartRsHoldingMetaAction(35000)
+        new MoveRegionsOfTableAction(moveRegionsSleepTime, moveRegionsMaxTime,
+            tableName),
+        new MoveRandomRegionOfTableAction(moveRandomRegionSleepTime, tableName),
+        new RestartRandomRsAction(restartRandomRSSleepTime),
+        new BatchRestartRsAction(batchRestartRSSleepTime, batchRestartRSRatio),
+        new RestartActiveMasterAction(restartActiveMasterSleepTime),
+        new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime,
+            rollingBatchRestartRSRatio),
+        new RestartRsHoldingMetaAction(restartRsHoldingMetaSleepTime)
     };
 
     // Action to log more info for debugging
@@ -93,11 +115,60 @@ public class SlowDeterministicMonkeyFactory extends MonkeyFactory {
     };
 
     return new PolicyBasedChaosMonkey(util,
-        new PeriodicRandomActionPolicy(60 * 1000, actions1),
-        new PeriodicRandomActionPolicy(90 * 1000, actions2),
+        new PeriodicRandomActionPolicy(action1Period, actions1),
+        new PeriodicRandomActionPolicy(action2Period, actions2),
         new CompositeSequentialPolicy(
-            new DoActionsOncePolicy(150 * 1000, actions3),
-            new PeriodicRandomActionPolicy(150 * 1000, actions3)),
-        new PeriodicRandomActionPolicy(90 * 1000, actions4));
+            new DoActionsOncePolicy(action3Period, actions3),
+            new PeriodicRandomActionPolicy(action3Period, actions3)),
+        new PeriodicRandomActionPolicy(action4Period, actions4));
+  }
+
+  private void loadProperties() {
+
+      action1Period = Long.parseLong(this.properties.getProperty(
+        MonkeyConstants.PERIODIC_ACTION1_PERIOD,
+        MonkeyConstants.DEFAULT_PERIODIC_ACTION1_PERIOD + ""));
+      action2Period = Long.parseLong(this.properties.getProperty(
+        MonkeyConstants.PERIODIC_ACTION2_PERIOD,
+        MonkeyConstants.DEFAULT_PERIODIC_ACTION2_PERIOD + ""));
+      action3Period = Long.parseLong(this.properties.getProperty(
+        MonkeyConstants.COMPOSITE_ACTION3_PERIOD,
+        MonkeyConstants.DEFAULT_COMPOSITE_ACTION3_PERIOD + ""));
+      action4Period = Long.parseLong(this.properties.getProperty(
+        MonkeyConstants.PERIODIC_ACTION4_PERIOD,
+        MonkeyConstants.DEFAULT_PERIODIC_ACTION4_PERIOD + ""));
+      moveRegionsMaxTime = Long.parseLong(this.properties.getProperty(
+        MonkeyConstants.MOVE_REGIONS_MAX_TIME,
+        MonkeyConstants.DEFAULT_MOVE_REGIONS_MAX_TIME + ""));
+      moveRegionsSleepTime = Long.parseLong(this.properties.getProperty(
+        MonkeyConstants.MOVE_REGIONS_SLEEP_TIME,
+        MonkeyConstants.DEFAULT_MOVE_REGIONS_SLEEP_TIME + ""));
+      moveRandomRegionSleepTime = Long.parseLong(this.properties.getProperty(
+        MonkeyConstants.MOVE_RANDOM_REGION_SLEEP_TIME,
+        MonkeyConstants.DEFAULT_MOVE_RANDOM_REGION_SLEEP_TIME + ""));
+      restartRandomRSSleepTime = Long.parseLong(this.properties.getProperty(
+        MonkeyConstants.RESTART_RANDOM_RS_SLEEP_TIME,
+        MonkeyConstants.DEFAULT_RESTART_RANDOM_RS_SLEEP_TIME + ""));
+      batchRestartRSSleepTime = Long.parseLong(this.properties.getProperty(
+        MonkeyConstants.BATCH_RESTART_RS_SLEEP_TIME,
+        MonkeyConstants.DEFAULT_BATCH_RESTART_RS_SLEEP_TIME + ""));
+      restartActiveMasterSleepTime = Long.parseLong(this.properties.getProperty(
+        MonkeyConstants.RESTART_ACTIVE_MASTER_SLEEP_TIME,
+        MonkeyConstants.DEFAULT_RESTART_ACTIVE_MASTER_SLEEP_TIME + ""));
+      rollingBatchRestartRSSleepTime = Long.parseLong(this.properties.getProperty(
+        MonkeyConstants.ROLLING_BATCH_RESTART_RS_SLEEP_TIME,
+        MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME + ""));
+      rollingBatchRestartRSRatio = Float.parseFloat(this.properties.getProperty(
+        MonkeyConstants.ROLLING_BATCH_RESTART_RS_RATIO,
+        MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_RATIO + ""));
+      restartRsHoldingMetaSleepTime = Long.parseLong(this.properties.getProperty(
+        MonkeyConstants.RESTART_RS_HOLDING_META_SLEEP_TIME,
+        MonkeyConstants.DEFAULT_RESTART_RS_HOLDING_META_SLEEP_TIME + ""));
+      compactTableRatio = Float.parseFloat(this.properties.getProperty(
+        MonkeyConstants.COMPACT_TABLE_ACTION_RATIO,
+        MonkeyConstants.DEFAULT_COMPACT_TABLE_ACTION_RATIO + ""));
+      compactRandomRegionRatio = Float.parseFloat(this.properties.getProperty(
+        MonkeyConstants.COMPACT_RANDOM_REGION_RATIO,
+        MonkeyConstants.DEFAULT_COMPACT_RANDOM_REGION_RATIO + ""));
   }
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/30d4d5de/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/UnbalanceMonkeyFactory.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/UnbalanceMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/UnbalanceMonkeyFactory.java
index 2d2d315..f4ea435 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/UnbalanceMonkeyFactory.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/UnbalanceMonkeyFactory.java
@@ -27,15 +27,33 @@ import org.apache.hadoop.hbase.chaos.policies.Policy;
 public class UnbalanceMonkeyFactory extends MonkeyFactory {
   /** How often to introduce the chaos. If too frequent, sequence of kills on minicluster
    * can cause test to fail when Put runs out of retries. */
-  private static final long CHAOS_EVERY_MS = 65 * 1000;
+  private long chaosEveryMilliSec;
+  private long waitForUnbalanceMilliSec;
+  private long waitForKillMilliSec;
+  private long waitAfterBalanceMilliSec;
 
   @Override
   public ChaosMonkey build() {
-    Policy chaosPolicy = new PeriodicRandomActionPolicy(
-        CHAOS_EVERY_MS,
-        new UnbalanceKillAndRebalanceAction()
-    );
+    loadProperties();
+    Policy chaosPolicy = new PeriodicRandomActionPolicy(chaosEveryMilliSec,
+        new UnbalanceKillAndRebalanceAction(waitForUnbalanceMilliSec, waitForKillMilliSec,
+            waitAfterBalanceMilliSec));
 
     return new PolicyBasedChaosMonkey(util, chaosPolicy);
   }
+
+  private void loadProperties() {
+    chaosEveryMilliSec = Long.parseLong(this.properties.getProperty(
+      MonkeyConstants.UNBALANCE_CHAOS_EVERY_MS,
+      MonkeyConstants.DEFAULT_UNBALANCE_CHAOS_EVERY_MS + ""));
+    waitForUnbalanceMilliSec = Long.parseLong(this.properties.getProperty(
+      MonkeyConstants.UNBALANCE_WAIT_FOR_UNBALANCE_MS,
+      MonkeyConstants.DEFAULT_UNBALANCE_WAIT_FOR_UNBALANCE_MS + ""));
+    waitForKillMilliSec = Long.parseLong(this.properties.getProperty(
+      MonkeyConstants.UNBALANCE_WAIT_FOR_KILLS_MS,
+      MonkeyConstants.DEFAULT_UNBALANCE_WAIT_FOR_KILLS_MS + ""));
+    waitAfterBalanceMilliSec = Long.parseLong(this.properties.getProperty(
+      MonkeyConstants.UNBALANCE_WAIT_AFTER_BALANCE_MS,
+      MonkeyConstants.DEFAULT_UNBALANCE_WAIT_AFTER_BALANCE_MS + ""));
+  }
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/30d4d5de/hbase-it/src/test/java/org/apache/hadoop/hbase/mttr/IntegrationTestMTTR.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/mttr/IntegrationTestMTTR.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/mttr/IntegrationTestMTTR.java
index 5d2f6ea..9571314 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/mttr/IntegrationTestMTTR.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/mttr/IntegrationTestMTTR.java
@@ -48,6 +48,7 @@ import org.apache.hadoop.hbase.chaos.actions.MoveRegionsOfTableAction;
 import org.apache.hadoop.hbase.chaos.actions.RestartActiveMasterAction;
 import org.apache.hadoop.hbase.chaos.actions.RestartRsHoldingMetaAction;
 import org.apache.hadoop.hbase.chaos.actions.RestartRsHoldingTableAction;
+import org.apache.hadoop.hbase.chaos.factories.MonkeyConstants;
 import org.apache.hadoop.hbase.client.HBaseAdmin;
 import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.client.Put;
@@ -188,7 +189,8 @@ public class IntegrationTestMTTR {
     restartMetaAction = new RestartRsHoldingMetaAction(sleepTime);
 
     // Set up the action that will move the regions of our table.
-    moveRegionAction = new MoveRegionsOfTableAction(sleepTime, tableName.getNameAsString());
+    moveRegionAction = new MoveRegionsOfTableAction(sleepTime,
+        MonkeyConstants.DEFAULT_MOVE_REGIONS_MAX_TIME, tableName.getNameAsString());
 
     // Kill the master
     restartMasterAction = new RestartActiveMasterAction(1000);


[4/5] git commit: Amend HBASE-12314 Add chaos monkey policy to execute two actions concurrently

Posted by ap...@apache.org.
Amend HBASE-12314 Add chaos monkey policy to execute two actions concurrently

Port changes in HBASE-12314 made in branch-1 and later with
HBASE-11348 in place

Amending-Author: Andrew Purtell <ap...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/941b4a7c
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/941b4a7c
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/941b4a7c

Branch: refs/heads/0.98
Commit: 941b4a7c46c6dd88e78f07d25605a725bf5e68b5
Parents: 5c511f3
Author: Elliott Clark <ec...@apache.org>
Authored: Wed Oct 29 20:56:58 2014 -0700
Committer: Andrew Purtell <ap...@apache.org>
Committed: Wed Oct 29 20:56:58 2014 -0700

----------------------------------------------------------------------
 .../chaos/factories/NoKillMonkeyFactory.java      | 18 +++++++++++-------
 .../factories/SlowDeterministicMonkeyFactory.java |  1 -
 2 files changed, 11 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/941b4a7c/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/NoKillMonkeyFactory.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/NoKillMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/NoKillMonkeyFactory.java
index c41a7a8..def9df0 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/NoKillMonkeyFactory.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/NoKillMonkeyFactory.java
@@ -47,8 +47,9 @@ import org.apache.hadoop.hbase.chaos.policies.TwoConcurrentActionPolicy;
 public class NoKillMonkeyFactory extends MonkeyFactory {
   @Override public ChaosMonkey build() {
     Action[] actions1 = new Action[] {
-        new CompactTableAction(tableName, 60*1000),
-        new CompactRandomRegionOfTableAction(tableName,0.6f),
+        new CompactTableAction(tableName, MonkeyConstants.DEFAULT_PERIODIC_ACTION1_PERIOD),
+        new CompactRandomRegionOfTableAction(tableName,
+            MonkeyConstants.DEFAULT_COMPACT_RANDOM_REGION_RATIO),
         new FlushTableAction(tableName),
         new FlushRandomRegionOfTableAction(tableName),
         new MoveRandomRegionOfTableAction(tableName)
@@ -67,8 +68,11 @@ public class NoKillMonkeyFactory extends MonkeyFactory {
     };
 
     Action[] actions3 = new Action[] {
-        new MoveRegionsOfTableAction(800,1600,tableName),
-        new MoveRandomRegionOfTableAction(800,tableName),
+        new MoveRegionsOfTableAction(MonkeyConstants.DEFAULT_MOVE_REGIONS_SLEEP_TIME,
+            MonkeyConstants.DEFAULT_MOVE_REGIONS_MAX_TIME,
+            tableName),
+        new MoveRandomRegionOfTableAction(MonkeyConstants.DEFAULT_RESTART_ACTIVE_MASTER_SLEEP_TIME,
+            tableName),
     };
 
     Action[] actions4 = new Action[] {
@@ -76,8 +80,8 @@ public class NoKillMonkeyFactory extends MonkeyFactory {
     };
 
     return new PolicyBasedChaosMonkey(util,
-        new TwoConcurrentActionPolicy(60*1000, actions1, actions2),
-        new PeriodicRandomActionPolicy(90*1000,actions3),
-        new PeriodicRandomActionPolicy(90*1000,actions4));
+        new TwoConcurrentActionPolicy(MonkeyConstants.DEFAULT_PERIODIC_ACTION1_PERIOD, actions1, actions2),
+        new PeriodicRandomActionPolicy(MonkeyConstants.DEFAULT_PERIODIC_ACTION2_PERIOD,actions3),
+        new PeriodicRandomActionPolicy(MonkeyConstants.DEFAULT_PERIODIC_ACTION4_PERIOD,actions4));
   }
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/941b4a7c/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java
index 70d64b9..6195737 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java
@@ -45,7 +45,6 @@ import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
 import org.apache.hadoop.hbase.chaos.policies.CompositeSequentialPolicy;
 import org.apache.hadoop.hbase.chaos.policies.DoActionsOncePolicy;
 import org.apache.hadoop.hbase.chaos.policies.PeriodicRandomActionPolicy;
-import org.junit.Assert;
 
 public class SlowDeterministicMonkeyFactory extends MonkeyFactory {
 


[5/5] git commit: HBASE-12322 Add Clean command to ITBLL

Posted by ap...@apache.org.
HBASE-12322 Add Clean command to ITBLL

Conflicts:
	hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestBigLinkedList.java

Signed-off-by: stack <st...@apache.org>
Amending-Author: Andrew Purtell <ap...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/ea479051
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/ea479051
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/ea479051

Branch: refs/heads/0.98
Commit: ea479051f177bef91c508305c800b351b9a9abf7
Parents: 941b4a7
Author: Elliott Clark <ec...@apache.org>
Authored: Wed Oct 29 20:56:58 2014 -0700
Committer: Andrew Purtell <ap...@apache.org>
Committed: Wed Oct 29 20:56:58 2014 -0700

----------------------------------------------------------------------
 .../test/IntegrationTestBigLinkedList.java      | 36 ++++++++++++++++++++
 1 file changed, 36 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/ea479051/hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestBigLinkedList.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestBigLinkedList.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestBigLinkedList.java
index 5abcaf6..0fd546b 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestBigLinkedList.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestBigLinkedList.java
@@ -39,6 +39,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
@@ -48,6 +49,7 @@ import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.IntegrationTestBase;
 import org.apache.hadoop.hbase.IntegrationTestingUtility;
 import org.apache.hadoop.hbase.IntegrationTests;
+import org.apache.hadoop.hbase.fs.HFileSystem;
 import org.apache.hadoop.hbase.MasterNotRunningException;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.Get;
@@ -1035,6 +1037,37 @@ public class IntegrationTestBigLinkedList extends IntegrationTestBase {
     }
   }
 
+  private static class Clean extends Configured implements Tool {
+
+    @Override public int run(String[] args) throws Exception {
+      if (args.length < 1) {
+        System.err.println("Usage: Clean <output dir>");
+        return -1;
+      }
+
+      Path p = new Path(args[0]);
+      Configuration conf = getConf();
+      TableName tableName = getTableName(conf);
+
+      FileSystem fs = HFileSystem.get(conf);
+      HBaseAdmin admin = new HBaseAdmin(conf);
+      try {
+        if (admin.tableExists(tableName)) {
+          admin.disableTable(tableName);
+          admin.deleteTable(tableName);
+        }
+      } finally {
+        admin.close();
+      }
+
+      if (fs.exists(p)) {
+        fs.delete(p, true);
+      }
+
+      return 0;
+    }
+  }
+
   static TableName getTableName(Configuration conf) {
     return TableName.valueOf(conf.get(TABLE_NAME_KEY, DEFAULT_TABLE_NAME));
   }
@@ -1108,6 +1141,7 @@ public class IntegrationTestBigLinkedList extends IntegrationTestBase {
     System.err.println("                             single node.");
     System.err.println("  Loop                       A program to Loop through Generator and");
     System.err.println("                             Verify steps");
+    System.err.println("  Clean                      A program to clean all left over detritus.");
     System.err.println("\t  ");
     System.err.flush();
   }
@@ -1143,6 +1177,8 @@ public class IntegrationTestBigLinkedList extends IntegrationTestBase {
       tool = new Print();
     } else if (toRun.equals("Delete")) {
       tool = new Delete();
+    } else if (toRun.equals("Clean")) {
+      tool = new Clean();
     } else {
       usage();
       throw new RuntimeException("Unknown arg");