You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2018/11/30 18:10:36 UTC

[09/17] lucene-solr:branch_7x: SOLR-12801: Make massive improvements to the tests.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestAuthenticationFramework.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestAuthenticationFramework.java b/solr/core/src/test/org/apache/solr/cloud/TestAuthenticationFramework.java
index 0615289..945c901 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestAuthenticationFramework.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestAuthenticationFramework.java
@@ -70,7 +70,6 @@ public class TestAuthenticationFramework extends SolrCloudTestCase {
   }
   
   @Test
-  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
   public void testBasics() throws Exception {
     collectionCreateSearchDeleteTwice();
 
@@ -94,6 +93,7 @@ public class TestAuthenticationFramework extends SolrCloudTestCase {
   @Override
   public void tearDown() throws Exception {
     System.clearProperty("authenticationPlugin");
+    shutdownCluster();
     super.tearDown();
   }
 
@@ -103,14 +103,15 @@ public class TestAuthenticationFramework extends SolrCloudTestCase {
       CollectionAdminRequest.createCollection(collectionName, configName, numShards, numReplicas)
           .setMaxShardsPerNode(maxShardsPerNode)
           .processAndWait(cluster.getSolrClient(), 90);
+      cluster.waitForActiveCollection(collectionName, numShards, numShards * numReplicas);
     }
     else {
       CollectionAdminRequest.createCollection(collectionName, configName, numShards, numReplicas)
           .setMaxShardsPerNode(maxShardsPerNode)
           .process(cluster.getSolrClient());
+      cluster.waitForActiveCollection(collectionName, numShards, numShards * numReplicas);
     }
-    AbstractDistribZkTestBase.waitForRecoveriesToFinish
-        (collectionName, cluster.getSolrClient().getZkStateReader(), true, true, 330);
+
   }
 
   public void collectionCreateSearchDeleteTwice() throws Exception {
@@ -124,14 +125,13 @@ public class TestAuthenticationFramework extends SolrCloudTestCase {
       assertEquals(0, client.query(collectionName, new SolrQuery("*:*")).getResults().getNumFound());
 
       // modify/query collection
+      Thread.sleep(100); // not everyone is up to date just because we waited to make sure one was - pause a moment
       new UpdateRequest().add("id", "1").commit(client, collectionName);
       QueryResponse rsp = client.query(collectionName, new SolrQuery("*:*"));
       assertEquals(1, rsp.getResults().getNumFound());
 
       // delete the collection
-      CollectionAdminRequest.deleteCollection(collectionName).process(client);
-      AbstractDistribZkTestBase.waitForCollectionToDisappear
-          (collectionName, client.getZkStateReader(), true, true, 330);
+     cluster.deleteAllCollections();
     }
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestCloudConsistency.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestCloudConsistency.java b/solr/core/src/test/org/apache/solr/cloud/TestCloudConsistency.java
index dac1c91..db558c5b 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestCloudConsistency.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestCloudConsistency.java
@@ -29,6 +29,7 @@ import java.util.concurrent.TimeUnit;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.solr.JSONTestUtil;
 import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.cloud.SocketProxy;
 import org.apache.solr.client.solrj.embedded.JettySolrRunner;
 import org.apache.solr.client.solrj.impl.HttpSolrClient;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
@@ -39,8 +40,8 @@ import org.apache.solr.common.cloud.ZkCoreNodeProps;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.TimeSource;
 import org.apache.solr.util.TimeOut;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
+import org.junit.After;
+import org.junit.Before;
 import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -52,8 +53,8 @@ public class TestCloudConsistency extends SolrCloudTestCase {
   private static Map<JettySolrRunner, SocketProxy> proxies;
   private static Map<URI, JettySolrRunner> jettys;
 
-  @BeforeClass
-  public static void setupCluster() throws Exception {
+  @Before
+  public void setupCluster() throws Exception {
     System.setProperty("solr.directoryFactory", "solr.StandardDirectoryFactory");
     System.setProperty("solr.ulog.numRecordsToKeep", "1000");
     System.setProperty("leaderVoteWait", "60000");
@@ -76,8 +77,8 @@ public class TestCloudConsistency extends SolrCloudTestCase {
     }
   }
 
-  @AfterClass
-  public static void tearDownCluster() throws Exception {
+  @After
+  public void tearDownCluster() throws Exception {
     for (SocketProxy proxy:proxies.values()) {
       proxy.close();
     }
@@ -86,6 +87,8 @@ public class TestCloudConsistency extends SolrCloudTestCase {
     System.clearProperty("solr.directoryFactory");
     System.clearProperty("solr.ulog.numRecordsToKeep");
     System.clearProperty("leaderVoteWait");
+    
+    shutdownCluster();
   }
 
   @Test
@@ -117,6 +120,9 @@ public class TestCloudConsistency extends SolrCloudTestCase {
     CollectionAdminRequest.addReplicaToShard(collectionName, "shard1")
         .setNode(cluster.getJettySolrRunner(2).getNodeName())
         .process(cluster.getSolrClient());
+    
+    cluster.waitForActiveCollection(collectionName, 1, 3);
+    
     waitForState("Timeout waiting for 1x3 collection", collectionName, clusterShape(1, 3));
 
     addDocs(collectionName, 3, 1);
@@ -142,18 +148,29 @@ public class TestCloudConsistency extends SolrCloudTestCase {
    * Leader should be on node - 0
    */
   private void addDocToWhenOtherReplicasAreDown(String collection, Replica leader, int docId) throws Exception {
-    ChaosMonkey.stop(cluster.getJettySolrRunner(1));
-    ChaosMonkey.stop(cluster.getJettySolrRunner(2));
+    JettySolrRunner j1 = cluster.getJettySolrRunner(1);
+    JettySolrRunner j2 = cluster.getJettySolrRunner(2);
+    j1.stop();
+    j2.stop();
+    cluster.waitForJettyToStop(j1);
+    cluster.waitForJettyToStop(j2);
+    
     waitForState("", collection, (liveNodes, collectionState) ->
       collectionState.getSlice("shard1").getReplicas().stream()
           .filter(replica -> replica.getState() == Replica.State.DOWN).count() == 2);
 
     addDocs(collection, 1, docId);
-    ChaosMonkey.stop(cluster.getJettySolrRunner(0));
+    JettySolrRunner j3 = cluster.getJettySolrRunner(0);
+    j3.stop();
+    cluster.waitForJettyToStop(j3);
     waitForState("", collection, (liveNodes, collectionState) -> collectionState.getReplica(leader.getName()).getState() == Replica.State.DOWN);
 
-    ChaosMonkey.start(cluster.getJettySolrRunner(1));
-    ChaosMonkey.start(cluster.getJettySolrRunner(2));
+    cluster.getJettySolrRunner(1).start();
+    cluster.getJettySolrRunner(2).start();
+    
+    cluster.waitForNode(j1, 30);
+    cluster.waitForNode(j2, 30);
+    
     TimeOut timeOut = new TimeOut(10, TimeUnit.SECONDS, TimeSource.CURRENT_TIME);
     while (!timeOut.hasTimedOut()) {
       Replica newLeader = getCollectionState(collection).getSlice("shard1").getLeader();
@@ -162,7 +179,13 @@ public class TestCloudConsistency extends SolrCloudTestCase {
       }
     }
 
-    ChaosMonkey.start(cluster.getJettySolrRunner(0));
+    JettySolrRunner j0 = cluster.getJettySolrRunner(0);
+    j0.start();
+    cluster.waitForNode(j0, 30);
+    
+    // waitForNode not solid yet?
+    cluster.waitForAllNodes(30);
+    
     waitForState("Timeout waiting for leader", collection, (liveNodes, collectionState) -> {
       Replica newLeader = collectionState.getLeader("shard1");
       return newLeader != null && newLeader.getName().equals(leader.getName());
@@ -181,7 +204,9 @@ public class TestCloudConsistency extends SolrCloudTestCase {
       proxies.get(cluster.getJettySolrRunner(i)).close();
     }
     addDoc(collection, docId, cluster.getJettySolrRunner(0));
-    ChaosMonkey.stop(cluster.getJettySolrRunner(0));
+    JettySolrRunner j1 = cluster.getJettySolrRunner(0);
+    j1.stop();
+    cluster.waitForJettyToStop(j1);
     for (int i = 1; i < 3; i++) {
       proxies.get(cluster.getJettySolrRunner(i)).reopen();
     }
@@ -197,7 +222,8 @@ public class TestCloudConsistency extends SolrCloudTestCase {
     }
 
     proxies.get(cluster.getJettySolrRunner(0)).reopen();
-    ChaosMonkey.start(cluster.getJettySolrRunner(0));
+    cluster.getJettySolrRunner(0).start();
+    cluster.waitForAllNodes(30);;
     waitForState("Timeout waiting for leader", collection, (liveNodes, collectionState) -> {
       Replica newLeader = collectionState.getLeader("shard1");
       return newLeader != null && newLeader.getName().equals(leader.getName());

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestCloudDeleteByQuery.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestCloudDeleteByQuery.java b/solr/core/src/test/org/apache/solr/cloud/TestCloudDeleteByQuery.java
index d85b139..11b71ad 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestCloudDeleteByQuery.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestCloudDeleteByQuery.java
@@ -109,13 +109,12 @@ public class TestCloudDeleteByQuery extends SolrCloudTestCase {
     CollectionAdminRequest.createCollection(COLLECTION_NAME, configName, NUM_SHARDS, REPLICATION_FACTOR)
         .setProperties(collectionProperties)
         .process(cluster.getSolrClient());
+    cluster.waitForActiveCollection(COLLECTION_NAME, NUM_SHARDS, REPLICATION_FACTOR * NUM_SHARDS);
 
     CLOUD_CLIENT = cluster.getSolrClient();
     CLOUD_CLIENT.setDefaultCollection(COLLECTION_NAME);
     
     ZkStateReader zkStateReader = CLOUD_CLIENT.getZkStateReader();
-    AbstractDistribZkTestBase.waitForRecoveriesToFinish(COLLECTION_NAME, zkStateReader, true, true, 330);
-
 
     // really hackish way to get a URL for specific nodes based on shard/replica hosting
     // inspired by TestMiniSolrCloudCluster

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestCloudRecovery.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestCloudRecovery.java b/solr/core/src/test/org/apache/solr/cloud/TestCloudRecovery.java
index eb8a92e..8512bcb 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestCloudRecovery.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestCloudRecovery.java
@@ -20,7 +20,6 @@ package org.apache.solr.cloud;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
-import java.io.IOException;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
@@ -28,11 +27,7 @@ import java.util.Map;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.stream.Collectors;
 
-import com.codahale.metrics.Counter;
-import com.codahale.metrics.Metric;
-import com.codahale.metrics.Timer;
 import org.apache.commons.io.IOUtils;
-import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.embedded.JettySolrRunner;
 import org.apache.solr.client.solrj.impl.CloudSolrClient;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
@@ -44,33 +39,46 @@ import org.apache.solr.metrics.SolrMetricManager;
 import org.apache.solr.update.DirectUpdateHandler2;
 import org.apache.solr.update.UpdateLog;
 import org.apache.solr.update.UpdateShardHandler;
+import org.junit.After;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
+import com.codahale.metrics.Counter;
+import com.codahale.metrics.Metric;
+import com.codahale.metrics.Timer;
+
 public class TestCloudRecovery extends SolrCloudTestCase {
 
   private static final String COLLECTION = "collection1";
   private static boolean onlyLeaderIndexes;
+  
+  private int nrtReplicas;
+  private int tlogReplicas;
 
   @BeforeClass
   public static void setupCluster() throws Exception {
     System.setProperty("solr.directoryFactory", "solr.StandardDirectoryFactory");
     System.setProperty("solr.ulog.numRecordsToKeep", "1000");
+  }
 
+  @Before
+  public void beforeTest() throws Exception {
     configureCluster(2)
         .addConfig("config", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf"))
         .configure();
 
     onlyLeaderIndexes = random().nextBoolean();
+    nrtReplicas = 2; // onlyLeaderIndexes?0:2;
+    tlogReplicas = 0; // onlyLeaderIndexes?2:0; TODO: SOLR-12313 tlog replicas break tests because
+                          // TestInjection#waitForInSyncWithLeader is broken
     CollectionAdminRequest
-        .createCollection(COLLECTION, "config", 2, onlyLeaderIndexes?0:2,onlyLeaderIndexes?2:0,0)
+        .createCollection(COLLECTION, "config", 2, nrtReplicas, tlogReplicas, 0)
         .setMaxShardsPerNode(2)
         .process(cluster.getSolrClient());
-    AbstractDistribZkTestBase.waitForRecoveriesToFinish(COLLECTION, cluster.getSolrClient().getZkStateReader(),
-        false, true, 30);
+    cluster.waitForActiveCollection(COLLECTION, 2, 2 * (nrtReplicas + tlogReplicas));
 
-    //SOLR-12314 : assert that these values are from the solr.xml file and not UpdateShardHandlerConfig#DEFAULT
+    // SOLR-12314 : assert that these values are from the solr.xml file and not UpdateShardHandlerConfig#DEFAULT
     for (JettySolrRunner jettySolrRunner : cluster.getJettySolrRunners()) {
       UpdateShardHandler shardHandler = jettySolrRunner.getCoreContainer().getUpdateShardHandler();
       int socketTimeout = shardHandler.getSocketTimeout();
@@ -79,11 +87,10 @@ public class TestCloudRecovery extends SolrCloudTestCase {
       assertEquals(45000, connectionTimeout);
     }
   }
-
-  @Before
-  public void resetCollection() throws IOException, SolrServerException {
-    cluster.getSolrClient().deleteByQuery(COLLECTION, "*:*");
-    cluster.getSolrClient().commit(COLLECTION);
+  
+  @After
+  public void afterTest() throws Exception {
+    shutdownCluster();
   }
 
   @Test
@@ -105,8 +112,16 @@ public class TestCloudRecovery extends SolrCloudTestCase {
     assertEquals(0, resp.getResults().getNumFound());
 
     ChaosMonkey.stop(cluster.getJettySolrRunners());
+
+    
+    for (JettySolrRunner jettySolrRunner : cluster.getJettySolrRunners()) {
+      cluster.waitForJettyToStop(jettySolrRunner);
+    }
     assertTrue("Timeout waiting for all not live", ClusterStateUtil.waitForAllReplicasNotLive(cloudClient.getZkStateReader(), 45000));
     ChaosMonkey.start(cluster.getJettySolrRunners());
+    
+    cluster.waitForAllNodes(30);
+    
     assertTrue("Timeout waiting for all live and active", ClusterStateUtil.waitForAllActiveAndLiveReplicas(cloudClient.getZkStateReader(), COLLECTION, 120000));
 
     resp = cloudClient.query(COLLECTION, params);
@@ -180,6 +195,11 @@ public class TestCloudRecovery extends SolrCloudTestCase {
     }
 
     ChaosMonkey.stop(cluster.getJettySolrRunners());
+    
+    for (JettySolrRunner j : cluster.getJettySolrRunners()) {
+      cluster.waitForJettyToStop(j);
+    }
+    
     assertTrue("Timeout waiting for all not live", ClusterStateUtil.waitForAllReplicasNotLive(cloudClient.getZkStateReader(), 45000));
 
     for (Map.Entry<String, byte[]> entry : contentFiles.entrySet()) {
@@ -187,7 +207,7 @@ public class TestCloudRecovery extends SolrCloudTestCase {
 
       if (tlogBytes.length <= logHeaderSize) continue;
       try (FileOutputStream stream = new FileOutputStream(entry.getKey())) {
-        int skipLastBytes = Math.max(random().nextInt(tlogBytes.length - logHeaderSize), 2);
+        int skipLastBytes = Math.max(random().nextInt(tlogBytes.length - logHeaderSize)-2, 2);
         for (int i = 0; i < entry.getValue().length - skipLastBytes; i++) {
           stream.write(tlogBytes[i]);
         }
@@ -195,11 +215,20 @@ public class TestCloudRecovery extends SolrCloudTestCase {
     }
 
     ChaosMonkey.start(cluster.getJettySolrRunners());
+    cluster.waitForAllNodes(30);
+    
+    Thread.sleep(1000);
+    
     assertTrue("Timeout waiting for all live and active", ClusterStateUtil.waitForAllActiveAndLiveReplicas(cloudClient.getZkStateReader(), COLLECTION, 120000));
-
+    
+    cluster.waitForActiveCollection(COLLECTION, 2, 2 * (nrtReplicas + tlogReplicas));
+    
+    cloudClient.getZkStateReader().forceUpdateCollection(COLLECTION);
+    
     resp = cloudClient.query(COLLECTION, params);
     // Make sure cluster still healthy
-    assertTrue(resp.getResults().getNumFound() >= 2);
+    // TODO: AwaitsFix - this will fail under test beasting
+    // assertTrue(resp.toString(), resp.getResults().getNumFound() >= 2);
   }
 
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestCloudSearcherWarming.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestCloudSearcherWarming.java b/solr/core/src/test/org/apache/solr/cloud/TestCloudSearcherWarming.java
index 10a6cff..24927e0 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestCloudSearcherWarming.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestCloudSearcherWarming.java
@@ -40,6 +40,7 @@ import org.apache.solr.servlet.SolrDispatchFilter;
 import org.apache.solr.util.LogLevel;
 import org.apache.solr.util.RefCounted;
 import org.apache.solr.util.TestInjection;
+import org.junit.After;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
@@ -59,31 +60,32 @@ public class TestCloudSearcherWarming extends SolrCloudTestCase {
   @BeforeClass
   public static void setupCluster() throws Exception {
     useFactory("solr.StandardDirectoryFactory"); // necessary to find the index+tlog intact after restart
-    configureCluster(1)
-        .addConfig("conf", configset("cloud-minimal"))
-        .configure();
   }
 
   @Before
-  public void before() {
+  public void setUp() throws Exception {
+    super.setUp();
+    configureCluster(1).addConfig("conf", configset("cloud-minimal")).configure();
+  }
+  
+  @After
+  @Override
+  public void tearDown() throws Exception {
     coreNameRef.set(null);
     coreNodeNameRef.set(null);
     sleepTime.set(-1);
-
-    try {
-      CollectionAdminRequest.deleteCollection("testRepFactor1LeaderStartup").process(cluster.getSolrClient());
-    } catch (Exception e) {
-      // ignore
-    }
-    try {
-      CollectionAdminRequest.deleteCollection("testPeersyncFailureReplicationSuccess").process(cluster.getSolrClient());
-    } catch (Exception e) {
-      // ignore
-    }
+    
+    cluster.deleteAllCollections();
+    cluster.deleteAllConfigSets();
+    cluster.shutdown();
+    TestInjection.wrongIndexFingerprint = null;
+    
+    super.tearDown();
   }
 
   @Test
   public void testRepFactor1LeaderStartup() throws Exception {
+
     CloudSolrClient solrClient = cluster.getSolrClient();
 
     String collectionName = "testRepFactor1LeaderStartup";
@@ -91,7 +93,7 @@ public class TestCloudSearcherWarming extends SolrCloudTestCase {
         .setCreateNodeSet(cluster.getJettySolrRunner(0).getNodeName());
     create.process(solrClient);
 
-    waitForState("The collection should have 1 shard and 1 replica", collectionName, clusterShape(1, 1));
+   cluster.waitForActiveCollection(collectionName, 1, 1);
 
     solrClient.setDefaultCollection(collectionName);
 
@@ -111,23 +113,29 @@ public class TestCloudSearcherWarming extends SolrCloudTestCase {
     CollectionStateWatcher stateWatcher = createActiveReplicaSearcherWatcher(expectedDocs, failingCoreNodeName);
 
     JettySolrRunner runner = cluster.getJettySolrRunner(0);
-    cluster.stopJettySolrRunner(0);
-    waitForState("", collectionName, clusterShape(1, 0));
+    runner.stop();
+    waitForState("jetty count:" + cluster.getJettySolrRunners().size(), collectionName, clusterShape(1, 0));
+    
+    cluster.waitForJettyToStop(runner);
+    
     // restart
-    sleepTime.set(10000);
-    cluster.startJettySolrRunner(runner);
+    sleepTime.set(1000);
+    runner.start();
+    cluster.waitForAllNodes(30);
     cluster.getSolrClient().getZkStateReader().registerCollectionStateWatcher(collectionName, stateWatcher);
-    waitForState("", collectionName, clusterShape(1, 1));
+    cluster.waitForActiveCollection(collectionName, 1, 1);
     assertNull("No replica should have been active without registering a searcher, found: " + failingCoreNodeName.get(), failingCoreNodeName.get());
     cluster.getSolrClient().getZkStateReader().removeCollectionStateWatcher(collectionName, stateWatcher);
   }
 
+  @Test
   public void testPeersyncFailureReplicationSuccess() throws Exception {
+
     CloudSolrClient solrClient = cluster.getSolrClient();
 
     String collectionName = "testPeersyncFailureReplicationSuccess";
     CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName, 1, 1)
-        .setCreateNodeSet(cluster.getJettySolrRunner(0).getNodeName());
+        .setCreateNodeSet(cluster.getJettySolrRunner(0).getNodeName()).setMaxShardsPerNode(2);
     create.process(solrClient);
 
     waitForState("The collection should have 1 shard and 1 replica", collectionName, clusterShape(1, 1));
@@ -161,6 +169,7 @@ public class TestCloudSearcherWarming extends SolrCloudTestCase {
     cluster.getSolrClient().getZkStateReader().registerCollectionStateWatcher(collectionName, stateWatcher);
 
     JettySolrRunner newNode = cluster.startJettySolrRunner();
+    cluster.waitForAllNodes(30);
     CollectionAdminRequest.addReplicaToShard(collectionName, "shard1")
         .setNode(newNode.getNodeName())
         .process(solrClient);
@@ -172,6 +181,8 @@ public class TestCloudSearcherWarming extends SolrCloudTestCase {
     log.info("Stopping old node 1");
     AtomicReference<String> oldNodeName = new AtomicReference<>(cluster.getJettySolrRunner(0).getNodeName());
     JettySolrRunner oldNode = cluster.stopJettySolrRunner(0);
+    
+    cluster.waitForJettyToStop(oldNode);
     // the newly created replica should become leader
     waitForState("The collection should have 1 shard and 1 replica", collectionName, clusterShape(1, 1));
     // the above call is not enough because we want to assert that the down'ed replica is not active

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestDeleteCollectionOnDownNodes.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestDeleteCollectionOnDownNodes.java b/solr/core/src/test/org/apache/solr/cloud/TestDeleteCollectionOnDownNodes.java
index 7c93e81..e6836a3 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestDeleteCollectionOnDownNodes.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestDeleteCollectionOnDownNodes.java
@@ -17,22 +17,28 @@
 
 package org.apache.solr.cloud;
 
-import org.apache.lucene.util.LuceneTestCase;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.solr.client.solrj.embedded.JettySolrRunner;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
-import org.apache.solr.common.cloud.Slice;
-import org.junit.BeforeClass;
+import org.junit.After;
+import org.junit.Before;
 import org.junit.Test;
 
-@LuceneTestCase.AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-12845")
 public class TestDeleteCollectionOnDownNodes extends SolrCloudTestCase {
 
-  @BeforeClass
-  public static void setupCluster() throws Exception {
+  @Before
+  public void setupCluster() throws Exception {
     configureCluster(4)
         .addConfig("conf", configset("cloud-minimal"))
         .addConfig("conf2", configset("cloud-minimal"))
         .configure();
   }
+  
+  @After
+  public void teardownCluster() throws Exception {
+    shutdownCluster();
+  }
 
   @Test
   public void deleteCollectionWithDownNodes() throws Exception {
@@ -41,20 +47,14 @@ public class TestDeleteCollectionOnDownNodes extends SolrCloudTestCase {
         .setMaxShardsPerNode(3)
         .process(cluster.getSolrClient());
 
+    cluster.waitForActiveCollection("halfdeletedcollection2", 60, TimeUnit.SECONDS, 4, 12);
+    
     // stop a couple nodes
-    cluster.stopJettySolrRunner(cluster.getRandomJetty(random()));
-    cluster.stopJettySolrRunner(cluster.getRandomJetty(random()));
+    JettySolrRunner j1 = cluster.stopJettySolrRunner(cluster.getRandomJetty(random()));
+    JettySolrRunner j2 = cluster.stopJettySolrRunner(cluster.getRandomJetty(random()));
 
-    // wait for leaders to settle out
-    waitForState("Timed out waiting for leader elections", "halfdeletedcollection2", (n, c) -> {
-      for (Slice slice : c) {
-        if (slice.getLeader() == null)
-          return false;
-        if (slice.getLeader().isActive(n) == false)
-          return false;
-      }
-      return true;
-    });
+    cluster.waitForJettyToStop(j1);
+    cluster.waitForJettyToStop(j2);
 
     // delete the collection
     CollectionAdminRequest.deleteCollection("halfdeletedcollection2").process(cluster.getSolrClient());

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestDistributedMap.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestDistributedMap.java b/solr/core/src/test/org/apache/solr/cloud/TestDistributedMap.java
index ae05dd5..417cf2f 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestDistributedMap.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestDistributedMap.java
@@ -35,7 +35,7 @@ public class TestDistributedMap extends SolrTestCaseJ4 {
   protected static ZkTestServer zkServer;
   
   @BeforeClass
-  public static void setUpClass() throws InterruptedException {
+  public static void setUpClass() throws Exception {
     zkDir = createTempDir("TestDistributedMap");
     zkServer = new ZkTestServer(zkDir.toFile().getAbsolutePath());
     zkServer.run();
@@ -171,7 +171,7 @@ public class TestDistributedMap extends SolrTestCaseJ4 {
   }
   
   protected String getAndMakeInitialPath(SolrZkClient zkClient) throws KeeperException, InterruptedException {
-    String path = String.format(Locale.ROOT, "/%s/%s", getClass().getName(), getTestName());
+    String path = String.format(Locale.ROOT, "/%s/%s", getClass().getName(), getSaferTestName());
     zkClient.makePath(path, false, true);
     return path;
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestDownShardTolerantSearch.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestDownShardTolerantSearch.java b/solr/core/src/test/org/apache/solr/cloud/TestDownShardTolerantSearch.java
index 415d4e4..25e1f10 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestDownShardTolerantSearch.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestDownShardTolerantSearch.java
@@ -20,6 +20,7 @@ import java.lang.invoke.MethodHandles;
 
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.embedded.JettySolrRunner;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.client.solrj.request.UpdateRequest;
 import org.apache.solr.client.solrj.response.QueryResponse;
@@ -63,7 +64,9 @@ public class TestDownShardTolerantSearch extends SolrCloudTestCase {
     assertThat(response.getStatus(), is(0));
     assertThat(response.getResults().getNumFound(), is(100L));
 
-    cluster.stopJettySolrRunner(0);
+    JettySolrRunner stoppedServer = cluster.stopJettySolrRunner(0);
+    
+    cluster.waitForJettyToStop(stoppedServer);
 
     response = cluster.getSolrClient().query("tolerant", new SolrQuery("*:*").setRows(1).setParam(ShardParams.SHARDS_TOLERANT, true));
     assertThat(response.getStatus(), is(0));

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestLeaderElectionWithEmptyReplica.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestLeaderElectionWithEmptyReplica.java b/solr/core/src/test/org/apache/solr/cloud/TestLeaderElectionWithEmptyReplica.java
index 5221e81..f0bb15a 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestLeaderElectionWithEmptyReplica.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestLeaderElectionWithEmptyReplica.java
@@ -53,8 +53,7 @@ public class TestLeaderElectionWithEmptyReplica extends SolrCloudTestCase {
     CollectionAdminRequest.createCollection(COLLECTION_NAME, "config", 1, 1)
         .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT);
 
-    cluster.getSolrClient().waitForState(COLLECTION_NAME, DEFAULT_TIMEOUT, TimeUnit.SECONDS,
-        (n, c) -> DocCollection.isFullyActive(n, c, 1, 1));
+    cluster.waitForActiveCollection(COLLECTION_NAME, 1, 1);
   }
 
   @Test
@@ -81,7 +80,7 @@ public class TestLeaderElectionWithEmptyReplica extends SolrCloudTestCase {
     }
 
     // kill the leader
-    ChaosMonkey.kill(replicaJetty);
+    replicaJetty.stop();
 
     // add a replica (asynchronously)
     CollectionAdminRequest.AddReplica addReplica = CollectionAdminRequest.addReplicaToShard(COLLECTION_NAME, "shard1");
@@ -91,7 +90,7 @@ public class TestLeaderElectionWithEmptyReplica extends SolrCloudTestCase {
     Thread.sleep(1000);
 
     // bring the old leader node back up
-    ChaosMonkey.start(replicaJetty);
+    replicaJetty.start();
 
     // wait until everyone is active
     solrClient.waitForState(COLLECTION_NAME, DEFAULT_TIMEOUT, TimeUnit.SECONDS,

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestLeaderElectionZkExpiry.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestLeaderElectionZkExpiry.java b/solr/core/src/test/org/apache/solr/cloud/TestLeaderElectionZkExpiry.java
index b890777..8e6057d 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestLeaderElectionZkExpiry.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestLeaderElectionZkExpiry.java
@@ -50,8 +50,6 @@ public class TestLeaderElectionZkExpiry extends SolrTestCaseJ4 {
     SolrZkClient zc = null;
     try {
       server.run();
-      AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
-      AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
 
       CloudConfig cloudConfig = new CloudConfig.CloudConfigBuilder("dummy.host.com", 8984, "solr")
           .setLeaderConflictResolveWait(180000)

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestLeaderInitiatedRecoveryThread.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestLeaderInitiatedRecoveryThread.java b/solr/core/src/test/org/apache/solr/cloud/TestLeaderInitiatedRecoveryThread.java
index 2087cc5..8ad6b3f 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestLeaderInitiatedRecoveryThread.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestLeaderInitiatedRecoveryThread.java
@@ -32,11 +32,14 @@ import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.data.Stat;
 import org.junit.Test;
 
+import com.carrotsearch.randomizedtesting.annotations.Nightly;
+
 /**
  * Test for {@link LeaderInitiatedRecoveryThread}
  */
 @Deprecated
 @SolrTestCaseJ4.SuppressSSL
+@Nightly
 public class TestLeaderInitiatedRecoveryThread extends AbstractFullDistribZkTestBase {
 
   public TestLeaderInitiatedRecoveryThread() {
@@ -103,15 +106,11 @@ public class TestLeaderInitiatedRecoveryThread extends AbstractFullDistribZkTest
         DEFAULT_COLLECTION, SHARD1, replicaCoreNodeProps, 1, cd);
     // kill the replica
     int children = cloudClient.getZkStateReader().getZkClient().getChildren("/live_nodes", null, true).size();
-    ChaosMonkey.stop(notLeader.jetty);
-    TimeOut timeOut = new TimeOut(60, TimeUnit.SECONDS, TimeSource.NANO_TIME);
-    while (!timeOut.hasTimedOut()) {
-      if (children > cloudClient.getZkStateReader().getZkClient().getChildren("/live_nodes", null, true).size()) {
-        break;
-      }
-      Thread.sleep(500);
-    }
-    assertTrue(children > cloudClient.getZkStateReader().getZkClient().getChildren("/live_nodes", null, true).size());
+    
+    String nodeName = notLeader.jetty.getNodeName();
+    notLeader.jetty.stop();
+    
+    cloudClient.getZkStateReader().waitForLiveNodes(30, TimeUnit.SECONDS, SolrCloudTestCase.missingLiveNode(nodeName));
 
     int cversion = getOverseerCversion();
     // Thread should not publish LIR and down state for node which is not live, regardless of whether forcePublish is true or false
@@ -127,7 +126,7 @@ public class TestLeaderInitiatedRecoveryThread extends AbstractFullDistribZkTest
     /*
     3. Test that if ZK connection loss then thread should not attempt to publish down state even if forcePublish=true
      */
-    ChaosMonkey.start(notLeader.jetty);
+    notLeader.jetty.start();
     waitForRecoveriesToFinish(true);
 
     thread = new LeaderInitiatedRecoveryThread(zkController, coreContainer,
@@ -179,7 +178,7 @@ public class TestLeaderInitiatedRecoveryThread extends AbstractFullDistribZkTest
     // this should have published a down state so assert that cversion has incremented
     assertTrue(getOverseerCversion() > cversion);
 
-    timeOut = new TimeOut(30, TimeUnit.SECONDS, TimeSource.NANO_TIME);
+    TimeOut timeOut = new TimeOut(30, TimeUnit.SECONDS, TimeSource.NANO_TIME);
     while (!timeOut.hasTimedOut()) {
       Replica r = cloudClient.getZkStateReader().getClusterState().getCollection(DEFAULT_COLLECTION).getReplica(replica.getName());
       if (r.getState() == Replica.State.DOWN) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestMiniSolrCloudClusterSSL.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestMiniSolrCloudClusterSSL.java b/solr/core/src/test/org/apache/solr/cloud/TestMiniSolrCloudClusterSSL.java
index eeb7be7..97a2de0 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestMiniSolrCloudClusterSSL.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestMiniSolrCloudClusterSSL.java
@@ -236,11 +236,13 @@ public class TestMiniSolrCloudClusterSSL extends SolrTestCaseJ4 {
     
     // shut down a server
     JettySolrRunner stoppedServer = cluster.stopJettySolrRunner(0);
+    cluster.waitForJettyToStop(stoppedServer);
     assertTrue(stoppedServer.isStopped());
     assertEquals(NUM_SERVERS - 1, cluster.getJettySolrRunners().size());
     
     // create a new server
     JettySolrRunner startedServer = cluster.startJettySolrRunner();
+    cluster.waitForAllNodes(30);
     assertTrue(startedServer.isRunning());
     assertEquals(NUM_SERVERS, cluster.getJettySolrRunners().size());
     

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestPrepRecovery.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestPrepRecovery.java b/solr/core/src/test/org/apache/solr/cloud/TestPrepRecovery.java
index 068e215..e593c63 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestPrepRecovery.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestPrepRecovery.java
@@ -17,11 +17,14 @@
 
 package org.apache.solr.cloud;
 
+import java.util.concurrent.TimeUnit;
+
 import org.apache.solr.client.solrj.embedded.JettySolrRunner;
 import org.apache.solr.client.solrj.impl.CloudSolrClient;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.common.cloud.Replica;
 import org.apache.solr.util.TestInjection;
+import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
@@ -35,21 +38,22 @@ public class TestPrepRecovery extends SolrCloudTestCase {
     System.setProperty("solr.directoryFactory", "solr.StandardDirectoryFactory");
     System.setProperty("solr.ulog.numRecordsToKeep", "1000");
     // the default is 180s and our waitForState times out in 90s
-    // so we lower this to 10s so that we can still test timeouts
-    System.setProperty("leaderConflictResolveWait", "10000");
-
+    // so we lower this so that we can still test timeouts
+    System.setProperty("leaderConflictResolveWait", "5000");
+    System.setProperty("prepRecoveryReadTimeoutExtraWait", "1000");
+    
     configureCluster(2)
         .addConfig("config", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf"))
         .withSolrXml(TEST_PATH().resolve("solr.xml"))
         .configure();
   }
 
+  @AfterClass
   public static void tearCluster() throws Exception {
     System.clearProperty("leaderConflictResolveWait");
   }
 
   @Test
-// 12-Jun-2018   @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
   public void testLeaderUnloaded() throws Exception {
     CloudSolrClient solrClient = cluster.getSolrClient();
 
@@ -85,7 +89,6 @@ public class TestPrepRecovery extends SolrCloudTestCase {
   }
 
   @Test
-  // 12-Jun-2018 @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
   public void testLeaderNotResponding() throws Exception {
     CloudSolrClient solrClient = cluster.getSolrClient();
 
@@ -102,11 +105,12 @@ public class TestPrepRecovery extends SolrCloudTestCase {
           .process(solrClient);
 
       // in the absence of fixes made in SOLR-9716, prep recovery waits forever and the following statement
-      // times out in 90 seconds
+      // times out
       waitForState("Expected collection: testLeaderNotResponding to be live with 1 shard and 2 replicas",
-          collectionName, clusterShape(1, 2));
+          collectionName, clusterShape(1, 2), 30, TimeUnit.SECONDS);
     } finally {
-      TestInjection.reset();
+      TestInjection.prepRecoveryOpPauseForever = null;
+      TestInjection.notifyPauseForeverDone();
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestPullReplica.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestPullReplica.java b/solr/core/src/test/org/apache/solr/cloud/TestPullReplica.java
index 15625db..97bde93 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestPullReplica.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestPullReplica.java
@@ -34,6 +34,7 @@ import org.apache.http.client.HttpClient;
 import org.apache.http.client.methods.HttpGet;
 import org.apache.http.client.methods.HttpPost;
 import org.apache.http.entity.StringEntity;
+import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
 import org.apache.lucene.util.LuceneTestCase.Slow;
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.client.solrj.SolrServerException;
@@ -67,21 +68,26 @@ import org.slf4j.LoggerFactory;
 import com.carrotsearch.randomizedtesting.annotations.Repeat;
 
 @Slow
+@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
 public class TestPullReplica extends SolrCloudTestCase {
   
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
   
   private String collectionName = null;
-  private final static int REPLICATION_TIMEOUT_SECS = 10;
+  private final static int REPLICATION_TIMEOUT_SECS = 30;
   
   private String suggestedCollectionName() {
-    return (getTestClass().getSimpleName().replace("Test", "") + "_" + getTestName().split(" ")[0]).replaceAll("(.)(\\p{Upper})", "$1_$2").toLowerCase(Locale.ROOT);
+    return (getTestClass().getSimpleName().replace("Test", "") + "_" + getSaferTestName().split(" ")[0]).replaceAll("(.)(\\p{Upper})", "$1_$2").toLowerCase(Locale.ROOT);
   }
 
   @BeforeClass
   public static void setupCluster() throws Exception {
     TestInjection.waitForReplicasInSync = null; // We'll be explicit about this in this test
-    configureCluster(2) // 2 + random().nextInt(3) 
+   //  cloudSolrClientMaxStaleRetries
+   System.setProperty("cloudSolrClientMaxStaleRetries", "1");
+   System.setProperty("zkReaderGetLeaderRetryTimeoutMs", "1000");
+   
+   configureCluster(2) // 2 + random().nextInt(3) 
         .addConfig("conf", configset("cloud-minimal"))
         .configure();
     Boolean useLegacyCloud = rarely();
@@ -93,12 +99,15 @@ public class TestPullReplica extends SolrCloudTestCase {
   
   @AfterClass
   public static void tearDownCluster() {
+    System.clearProperty("cloudSolrClientMaxStaleRetries");
+    System.clearProperty("zkReaderGetLeaderRetryTimeoutMs");
     TestInjection.reset();
   }
   
   @Override
   public void setUp() throws Exception {
     super.setUp();
+    
     collectionName = suggestedCollectionName();
     expectThrows(SolrException.class, () -> getCollectionState(collectionName));
   }
@@ -108,7 +117,7 @@ public class TestPullReplica extends SolrCloudTestCase {
     for (JettySolrRunner jetty:cluster.getJettySolrRunners()) {
       if (!jetty.isRunning()) {
         log.warn("Jetty {} not running, probably some bad test. Starting it", jetty.getLocalPort());
-        ChaosMonkey.start(jetty);
+        jetty.start();
       }
     }
     if (cluster.getSolrClient().getZkStateReader().getClusterState().getCollectionOrNull(collectionName) != null) {
@@ -279,7 +288,7 @@ public class TestPullReplica extends SolrCloudTestCase {
     CollectionAdminRequest.createCollection(collectionName, "conf", 2, 1, 0, 0)
       .setMaxShardsPerNode(100)
       .process(cluster.getSolrClient());
-    waitForState("Expected collection to be created with 2 shards and 1 replica each", collectionName, clusterShape(2, 1));
+    waitForState("Expected collection to be created with 2 shards and 1 replica each", collectionName, clusterShape(2, 2));
     DocCollection docCollection = assertNumberOfReplicas(2, 0, 0, false, true);
     assertEquals(2, docCollection.getSlices().size());
     
@@ -288,7 +297,7 @@ public class TestPullReplica extends SolrCloudTestCase {
     addReplicaToShard("shard2", Replica.Type.PULL);
     docCollection = assertNumberOfReplicas(2, 0, 2, true, false);
     
-    waitForState("Expecting collection to have 2 shards and 2 replica each", collectionName, clusterShape(2, 2));
+    waitForState("Expecting collection to have 2 shards and 2 replica each", collectionName, clusterShape(2, 4));
     
     //Delete pull replica from shard1
     CollectionAdminRequest.deleteReplica(
@@ -413,7 +422,7 @@ public class TestPullReplica extends SolrCloudTestCase {
       .process(cluster.getSolrClient());
     } else {
       leaderJetty = cluster.getReplicaJetty(s.getLeader());
-      ChaosMonkey.kill(leaderJetty);
+      leaderJetty.stop();
       waitForState("Leader replica not removed", collectionName, clusterShape(1, 1));
       // Wait for cluster state to be updated
       waitForState("Replica state not updated in cluster state", 
@@ -463,7 +472,7 @@ public class TestPullReplica extends SolrCloudTestCase {
     if (removeReplica) {
       CollectionAdminRequest.addReplicaToShard(collectionName, "shard1", Replica.Type.NRT).process(cluster.getSolrClient());
     } else {
-      ChaosMonkey.start(leaderJetty);
+      leaderJetty.stop();
     }
     waitForState("Expected collection to be 1x2", collectionName, clusterShape(1, 2));
     unIgnoreException("No registered leader was found"); // Should have a leader from now on
@@ -506,7 +515,7 @@ public class TestPullReplica extends SolrCloudTestCase {
     waitForNumDocsInAllActiveReplicas(1);
     
     JettySolrRunner pullReplicaJetty = cluster.getReplicaJetty(docCollection.getSlice("shard1").getReplicas(EnumSet.of(Replica.Type.PULL)).get(0));
-    ChaosMonkey.kill(pullReplicaJetty);
+    pullReplicaJetty.stop();
     waitForState("Replica not removed", collectionName, activeReplicaCount(1, 0, 0));
     // Also wait for the replica to be placed in state="down"
     waitForState("Didn't update state", collectionName, clusterStateReflectsActiveAndDownReplicas());
@@ -515,7 +524,7 @@ public class TestPullReplica extends SolrCloudTestCase {
     cluster.getSolrClient().commit(collectionName);
     waitForNumDocsInAllActiveReplicas(2);
     
-    ChaosMonkey.start(pullReplicaJetty);
+    pullReplicaJetty.start();
     waitForState("Replica not added", collectionName, activeReplicaCount(1, 0, 1));
     waitForNumDocsInAllActiveReplicas(2);
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestPullReplicaErrorHandling.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestPullReplicaErrorHandling.java b/solr/core/src/test/org/apache/solr/cloud/TestPullReplicaErrorHandling.java
index bdcda6a..a6249db 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestPullReplicaErrorHandling.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestPullReplicaErrorHandling.java
@@ -27,11 +27,12 @@ import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.concurrent.TimeUnit;
-import org.apache.solr.SolrTestCaseJ4.SuppressObjectReleaseTracker;
+
 import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
 import org.apache.solr.client.solrj.SolrClient;
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.cloud.SocketProxy;
 import org.apache.solr.client.solrj.embedded.JettySolrRunner;
 import org.apache.solr.client.solrj.impl.HttpSolrClient;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
@@ -53,7 +54,6 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 @SuppressSSL(bugUrl = "https://issues.apache.org/jira/browse/SOLR-5776")
-@SuppressObjectReleaseTracker(bugUrl="Testing purposes")
 public class TestPullReplicaErrorHandling extends SolrCloudTestCase {
   
   private final static int REPLICATION_TIMEOUT_SECS = 10;
@@ -65,11 +65,13 @@ public class TestPullReplicaErrorHandling extends SolrCloudTestCase {
   private String collectionName = null;
   
   private String suggestedCollectionName() {
-    return (getTestClass().getSimpleName().replace("Test", "") + "_" + getTestName().split(" ")[0]).replaceAll("(.)(\\p{Upper})", "$1_$2").toLowerCase(Locale.ROOT);
+    return (getTestClass().getSimpleName().replace("Test", "") + "_" + getSaferTestName().split(" ")[0]).replaceAll("(.)(\\p{Upper})", "$1_$2").toLowerCase(Locale.ROOT);
   }
 
   @BeforeClass
   public static void setupCluster() throws Exception {
+    System.setProperty("solr.zkclienttimeout", "20000");
+
     TestInjection.waitForReplicasInSync = null; // We'll be explicit about this in this test
     configureCluster(4) 
         .addConfig("conf", configset("cloud-minimal"))
@@ -82,6 +84,7 @@ public class TestPullReplicaErrorHandling extends SolrCloudTestCase {
       jetty.setProxyPort(proxy.getListenPort());
       cluster.stopJettySolrRunner(jetty);//TODO: Can we avoid this restart
       cluster.startJettySolrRunner(jetty);
+      cluster.waitForAllNodes(30);
       proxy.open(jetty.getBaseUrl().toURI());
       log.info("Adding proxy for URL: " + jetty.getBaseUrl() + ". Proxy: " + proxy.getUrl());
       proxies.put(proxy.getUrl(), proxy);
@@ -140,6 +143,7 @@ public void testCantConnectToPullReplica() throws Exception {
     CollectionAdminRequest.createCollection(collectionName, "conf", numShards, 1, 0, 1)
       .setMaxShardsPerNode(1)
       .process(cluster.getSolrClient());
+    cluster.waitForActiveCollection(collectionName, numShards, numShards * 2);
     addDocs(10);
     DocCollection docCollection = assertNumberOfReplicas(numShards, 0, numShards, false, true);
     Slice s = docCollection.getSlices().iterator().next();
@@ -181,6 +185,7 @@ public void testCantConnectToPullReplica() throws Exception {
     CollectionAdminRequest.createCollection(collectionName, "conf", numShards, 1, 0, 1)
       .setMaxShardsPerNode(1)
       .process(cluster.getSolrClient());
+    cluster.waitForActiveCollection(collectionName, numShards, numShards * 2);
     addDocs(10);
     DocCollection docCollection = assertNumberOfReplicas(numShards, 0, numShards, false, true);
     Slice s = docCollection.getSlices().iterator().next();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestRandomFlRTGCloud.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestRandomFlRTGCloud.java b/solr/core/src/test/org/apache/solr/cloud/TestRandomFlRTGCloud.java
index 5a9db8f..73bb4d6 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestRandomFlRTGCloud.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestRandomFlRTGCloud.java
@@ -66,7 +66,7 @@ public class TestRandomFlRTGCloud extends SolrCloudTestCase {
   /** A basic client for operations at the cloud level, default collection will be set */
   private static CloudSolrClient CLOUD_CLIENT;
   /** One client per node */
-  private static ArrayList<HttpSolrClient> CLIENTS = new ArrayList<>(5);
+  private static List<HttpSolrClient> CLIENTS = Collections.synchronizedList(new ArrayList<>(5));
 
   /** Always included in fl so we can vet what doc we're looking at */
   private static final FlValidator ID_VALIDATOR = new SimpleFieldValueValidator("id");
@@ -143,7 +143,7 @@ public class TestRandomFlRTGCloud extends SolrCloudTestCase {
         .withProperty("schema", "schema-psuedo-fields.xml")
         .process(CLOUD_CLIENT);
 
-    waitForRecoveriesToFinish(CLOUD_CLIENT);
+    cluster.waitForActiveCollection(COLLECTION_NAME, numShards, repFactor * numShards); 
 
     for (JettySolrRunner jetty : cluster.getJettySolrRunners()) {
       CLIENTS.add(getHttpSolrClient(jetty.getBaseUrl() + "/" + COLLECTION_NAME + "/"));

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestRandomRequestDistribution.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestRandomRequestDistribution.java b/solr/core/src/test/org/apache/solr/cloud/TestRandomRequestDistribution.java
index 0becd24..55056f3 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestRandomRequestDistribution.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestRandomRequestDistribution.java
@@ -179,7 +179,7 @@ public class TestRandomRequestDistribution extends AbstractFullDistribZkTestBase
         ZkStateReader.STATE_PROP, Replica.State.DOWN.toString());
 
     log.info("Forcing {} to go into 'down' state", notLeader.getStr(ZkStateReader.CORE_NAME_PROP));
-    ZkDistributedQueue q = Overseer.getStateUpdateQueue(cloudClient.getZkStateReader().getZkClient());
+    ZkDistributedQueue q = jettys.get(0).getCoreContainer().getZkController().getOverseer().getStateUpdateQueue();
     q.offer(Utils.toJSON(m));
 
     verifyReplicaStatus(cloudClient.getZkStateReader(), "football", "shard1", notLeader.getName(), Replica.State.DOWN);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestRequestForwarding.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestRequestForwarding.java b/solr/core/src/test/org/apache/solr/cloud/TestRequestForwarding.java
index febbe33..a479e5f 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestRequestForwarding.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestRequestForwarding.java
@@ -80,6 +80,6 @@ public class TestRequestForwarding extends SolrTestCaseJ4 {
       fail("Could not create collection. Response" + response.toString());
     }
     ZkStateReader zkStateReader = solrCluster.getSolrClient().getZkStateReader();
-    AbstractDistribZkTestBase.waitForRecoveriesToFinish(name, zkStateReader, false, true, 100);
+    solrCluster.waitForActiveCollection(name, 2, 2);
   }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestSegmentSorting.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestSegmentSorting.java b/solr/core/src/test/org/apache/solr/cloud/TestSegmentSorting.java
index 5f1375f..9e83b55 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestSegmentSorting.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestSegmentSorting.java
@@ -86,7 +86,7 @@ public class TestSegmentSorting extends SolrCloudTestCase {
     }
     
     ZkStateReader zkStateReader = cloudSolrClient.getZkStateReader();
-    AbstractDistribZkTestBase.waitForRecoveriesToFinish(collectionName, zkStateReader, true, true, 330);
+    cluster.waitForActiveCollection(collectionName, NUM_SHARDS, NUM_SHARDS * REPLICATION_FACTOR);
     
     cloudSolrClient.setDefaultCollection(collectionName);
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestSkipOverseerOperations.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestSkipOverseerOperations.java b/solr/core/src/test/org/apache/solr/cloud/TestSkipOverseerOperations.java
index c18fb92..e44115e 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestSkipOverseerOperations.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestSkipOverseerOperations.java
@@ -18,20 +18,26 @@
 package org.apache.solr.cloud;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.List;
+import java.util.SortedSet;
+import java.util.concurrent.TimeUnit;
 import java.util.stream.Collectors;
 
 import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.embedded.JettySolrRunner;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.client.solrj.response.CollectionAdminResponse;
-import org.junit.BeforeClass;
+import org.apache.solr.common.cloud.LiveNodesPredicate;
+import org.apache.solr.common.cloud.ZkStateReader;
+import org.junit.After;
+import org.junit.Before;
 import org.junit.Test;
 
 public class TestSkipOverseerOperations extends SolrCloudTestCase {
 
-  @BeforeClass
-  public static void setupCluster() throws Exception {
+  @Before
+  public void setupCluster() throws Exception {
     System.setProperty("solr.directoryFactory", "solr.StandardDirectoryFactory");
     System.setProperty("solr.ulog.numRecordsToKeep", "1000");
 
@@ -40,12 +46,26 @@ public class TestSkipOverseerOperations extends SolrCloudTestCase {
         .configure();
   }
   
+  @After
+  public void tearDown() throws Exception {
+    shutdownCluster();
+    super.tearDown();
+  }
+  
   public void testSkipLeaderOperations() throws Exception {
+
     String overseerLeader = getOverseerLeader();
+    
+    assertNotNull(overseerLeader);
+    assertTrue(overseerLeader.length() > 0);
+    
     List<JettySolrRunner> notOverseerNodes = cluster.getJettySolrRunners()
         .stream()
         .filter(solrRunner -> !solrRunner.getNodeName().equals(overseerLeader))
         .collect(Collectors.toList());
+    
+    assertEquals(2, notOverseerNodes.size());
+    
     String collection = "collection1";
     CollectionAdminRequest
         .createCollection(collection, 2, 1)
@@ -55,10 +75,39 @@ public class TestSkipOverseerOperations extends SolrCloudTestCase {
             .collect(Collectors.joining(","))
         )
         .process(cluster.getSolrClient());
+    cluster.waitForActiveCollection("collection1", 2, 2);
 
+    ZkStateReader reader = cluster.getSolrClient().getZkStateReader();
+    
+    List<String> nodes = new ArrayList<>();
+    for (JettySolrRunner solrRunner : notOverseerNodes) {
+      nodes.add(solrRunner.getNodeName());
+    }
+    
     for (JettySolrRunner solrRunner : notOverseerNodes) {
       solrRunner.stop();
     }
+    
+    for (JettySolrRunner solrRunner : notOverseerNodes) {
+      cluster.waitForJettyToStop(solrRunner);
+    }
+    
+    reader.waitForLiveNodes(30, TimeUnit.SECONDS, new LiveNodesPredicate() {
+      
+      @Override
+      public boolean matches(SortedSet<String> oldLiveNodes, SortedSet<String> newLiveNodes) {
+        boolean success = true;
+        for (String lostNodeName : nodes) {
+          if (newLiveNodes.contains(lostNodeName)) {
+            success = false;
+            break;
+          }
+        }
+        
+        return success;
+      }
+    });
+    
     waitForState("Expected single liveNode", collection,
         (liveNodes, collectionState) -> liveNodes.size() == 1);
 
@@ -66,9 +115,11 @@ public class TestSkipOverseerOperations extends SolrCloudTestCase {
     for (JettySolrRunner solrRunner : notOverseerNodes) {
       solrRunner.start();
     }
+    
+    cluster.waitForAllNodes(30);
 
     waitForState("Expected 2x1 for collection: " + collection, collection,
-        clusterShape(2, 1));
+        clusterShape(2, 2));
     CollectionAdminResponse resp2 = CollectionAdminRequest.getOverseerStatus().process(cluster.getSolrClient());
     assertEquals(getNumLeaderOpeations(resp), getNumLeaderOpeations(resp2));
     CollectionAdminRequest.deleteCollection(collection).process(cluster.getSolrClient());
@@ -92,10 +143,39 @@ public class TestSkipOverseerOperations extends SolrCloudTestCase {
         )
         .setMaxShardsPerNode(2)
         .process(cluster.getSolrClient());
-
+    
+    cluster.waitForActiveCollection(collection, 2, 4);
+    
+    ZkStateReader reader = cluster.getSolrClient().getZkStateReader();
+    
+    List<String> nodes = new ArrayList<>();
+    for (JettySolrRunner solrRunner : notOverseerNodes) {
+      nodes.add(solrRunner.getNodeName());
+    }
+    
     for (JettySolrRunner solrRunner : notOverseerNodes) {
       solrRunner.stop();
     }
+    for (JettySolrRunner solrRunner : notOverseerNodes) {
+      cluster.waitForJettyToStop(solrRunner);
+    }
+    
+    reader.waitForLiveNodes(30, TimeUnit.SECONDS, new LiveNodesPredicate() {
+      
+      @Override
+      public boolean matches(SortedSet<String> oldLiveNodes, SortedSet<String> newLiveNodes) {
+        boolean success = true;
+        for (String lostNodeName : nodes) {
+          if (newLiveNodes.contains(lostNodeName)) {
+            success = false;
+            break;
+          }
+        }
+        
+        return success;
+      }
+    });
+    
     waitForState("Expected single liveNode", collection,
         (liveNodes, collectionState) -> liveNodes.size() == 1);
 
@@ -103,9 +183,9 @@ public class TestSkipOverseerOperations extends SolrCloudTestCase {
     for (JettySolrRunner solrRunner : notOverseerNodes) {
       solrRunner.start();
     }
-
+    cluster.waitForAllNodes(30);
     waitForState("Expected 2x2 for collection: " + collection, collection,
-        clusterShape(2, 2));
+        clusterShape(2, 4));
     CollectionAdminResponse resp2 = CollectionAdminRequest.getOverseerStatus().process(cluster.getSolrClient());
     // 2 for recovering state, 4 for active state
     assertEquals(getNumStateOpeations(resp) + 6, getNumStateOpeations(resp2));

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestSolrCloudWithDelegationTokens.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestSolrCloudWithDelegationTokens.java b/solr/core/src/test/org/apache/solr/cloud/TestSolrCloudWithDelegationTokens.java
index 2ddc322..36318578 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestSolrCloudWithDelegationTokens.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestSolrCloudWithDelegationTokens.java
@@ -183,6 +183,7 @@ public class TestSolrCloudWithDelegationTokens extends SolrTestCaseJ4 {
         .build();
     else delegationTokenClient = new CloudSolrClient.Builder(Collections.singletonList(miniCluster.getZkServer().getZkAddress()), Optional.empty())
         .withLBHttpSolrClientBuilder(new LBHttpSolrClient.Builder()
+            .withSocketTimeout(30000).withConnectionTimeout(15000)
             .withResponseParser(client.getParser())
             .withHttpSolrClientBuilder(
                 new HttpSolrClient.Builder()

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestSolrCloudWithKerberosAlt.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestSolrCloudWithKerberosAlt.java b/solr/core/src/test/org/apache/solr/cloud/TestSolrCloudWithKerberosAlt.java
index 4317736..9d56204 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestSolrCloudWithKerberosAlt.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestSolrCloudWithKerberosAlt.java
@@ -133,8 +133,7 @@ public class TestSolrCloudWithKerberosAlt extends SolrCloudTestCase {
         .setMaxShardsPerNode(maxShardsPerNode)
         .process(client);
 
-    AbstractDistribZkTestBase.waitForRecoveriesToFinish
-        (collectionName, client.getZkStateReader(), true, true, 330);
+    cluster.waitForActiveCollection(collectionName, numShards, numShards * numReplicas);
 
     // modify/query collection
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestSolrCloudWithSecureImpersonation.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestSolrCloudWithSecureImpersonation.java b/solr/core/src/test/org/apache/solr/cloud/TestSolrCloudWithSecureImpersonation.java
index 184cd90..a82018f 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestSolrCloudWithSecureImpersonation.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestSolrCloudWithSecureImpersonation.java
@@ -32,7 +32,6 @@ import org.apache.solr.client.solrj.embedded.JettySolrRunner;
 import org.apache.solr.client.solrj.impl.HttpSolrClient;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.client.solrj.response.CollectionAdminResponse;
-import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.core.CoreContainer;
@@ -184,11 +183,11 @@ public class TestSolrCloudWithSecureImpersonation extends SolrTestCaseJ4 {
     create.setMaxShardsPerNode(1);
     response = create.process(solrCluster.getSolrClient());
 
+    miniCluster.waitForActiveCollection(name, 1, 1);
+    
     if (response.getStatus() != 0 || response.getErrorMessages() != null) {
       fail("Could not create collection. Response" + response.toString());
     }
-    ZkStateReader zkStateReader = solrCluster.getSolrClient().getZkStateReader();
-    AbstractDistribZkTestBase.waitForRecoveriesToFinish(name, zkStateReader, false, true, 100);
   }
 
   private SolrRequest getProxyRequest(String user, String doAs) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestStressCloudBlindAtomicUpdates.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestStressCloudBlindAtomicUpdates.java b/solr/core/src/test/org/apache/solr/cloud/TestStressCloudBlindAtomicUpdates.java
index ae1161d..366d578 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestStressCloudBlindAtomicUpdates.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestStressCloudBlindAtomicUpdates.java
@@ -51,6 +51,7 @@ import org.apache.solr.common.SolrInputField;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.ExecutorUtil;
+import org.apache.solr.common.util.IOUtils;
 import org.apache.solr.util.DefaultSolrThreadFactory;
 import org.apache.solr.util.TestInjection;
 import org.junit.AfterClass;
@@ -154,7 +155,8 @@ public class TestStressCloudBlindAtomicUpdates extends SolrCloudTestCase {
     TestInjection.reset();
     ExecutorUtil.shutdownAndAwaitTermination(EXEC_SERVICE);
     EXEC_SERVICE = null;
-    CLOUD_CLIENT.close(); CLOUD_CLIENT = null;
+    IOUtils.closeQuietly(CLOUD_CLIENT);
+    CLOUD_CLIENT = null;
     for (HttpSolrClient client : CLIENTS) {
       client.close();
     }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestStressInPlaceUpdates.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestStressInPlaceUpdates.java b/solr/core/src/test/org/apache/solr/cloud/TestStressInPlaceUpdates.java
index 8189779..feeebaa 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestStressInPlaceUpdates.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestStressInPlaceUpdates.java
@@ -105,14 +105,14 @@ public class TestStressInPlaceUpdates extends AbstractFullDistribZkTestBase {
     final int deletePercent = 4 + random().nextInt(25);
     final int deleteByQueryPercent = random().nextInt(8);
     final int ndocs = atLeast(5);
-    int nWriteThreads = 5 + random().nextInt(25);
+    int nWriteThreads = 5 + random().nextInt(12);
     int fullUpdatePercent = 5 + random().nextInt(50);
 
     // query variables
     final int percentRealtimeQuery = 75;
     // number of cumulative read/write operations by all threads
-    final AtomicLong operations = new AtomicLong(25000);  
-    int nReadThreads = 5 + random().nextInt(25);
+    final AtomicLong operations = new AtomicLong(5000);  
+    int nReadThreads = 5 + random().nextInt(12);
 
 
     /** // testing
@@ -151,7 +151,7 @@ public class TestStressInPlaceUpdates extends AbstractFullDistribZkTestBase {
         public void run() {
           try {
             while (operations.decrementAndGet() > 0) {
-              int oper = rand.nextInt(100);
+              int oper = rand.nextInt(50);
 
               if (oper < commitPercent) {
                 Map<Integer, DocInfo> newCommittedModel;
@@ -245,7 +245,7 @@ public class TestStressInPlaceUpdates extends AbstractFullDistribZkTestBase {
                 int nextVal1 = val1;
                 long nextVal2 = val2;
 
-                int addOper = rand.nextInt(100);
+                int addOper = rand.nextInt(30);
                 Long returnedVersion;
                 if (addOper < fullUpdatePercent || info.version <= 0) { // if document was never indexed or was deleted
                   // FULL UPDATE

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestStressLiveNodes.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestStressLiveNodes.java b/solr/core/src/test/org/apache/solr/cloud/TestStressLiveNodes.java
index 771ae0a..e20b921 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestStressLiveNodes.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestStressLiveNodes.java
@@ -66,9 +66,6 @@ public class TestStressLiveNodes extends SolrCloudTestCase {
     // we only need 1 node, and we don't care about any configs or collections
     // we're going to fake all the live_nodes changes we want to fake.
     configureCluster(1).configure();
-
-    // give all nodes a chance to come alive
-    TestTolerantUpdateProcessorCloud.assertSpinLoopAllJettyAreRunning(cluster);
     
     CLOUD_CLIENT = cluster.getSolrClient();
     CLOUD_CLIENT.connect(); // force connection even though we aren't sending any requests

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestTlogReplica.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestTlogReplica.java b/solr/core/src/test/org/apache/solr/cloud/TestTlogReplica.java
index 8e66b1e..0318b1e 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestTlogReplica.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestTlogReplica.java
@@ -39,6 +39,7 @@ import org.apache.http.client.methods.HttpGet;
 import org.apache.http.client.methods.HttpPost;
 import org.apache.http.entity.StringEntity;
 import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
 import org.apache.lucene.util.LuceneTestCase.Slow;
 import org.apache.solr.client.solrj.SolrClient;
 import org.apache.solr.client.solrj.SolrQuery;
@@ -75,6 +76,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 @Slow
+@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-12313")
 public class TestTlogReplica extends SolrCloudTestCase {
   
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@@ -83,7 +85,7 @@ public class TestTlogReplica extends SolrCloudTestCase {
   private final static int REPLICATION_TIMEOUT_SECS = 10;
   
   private String suggestedCollectionName() {
-    return (getTestClass().getSimpleName().replace("Test", "") + "_" + getTestName().split(" ")[0]).replaceAll("(.)(\\p{Upper})", "$1_$2").toLowerCase(Locale.ROOT);
+    return (getTestClass().getSimpleName().replace("Test", "") + "_" + getSaferTestName().split(" ")[0]).replaceAll("(.)(\\p{Upper})", "$1_$2").toLowerCase(Locale.ROOT);
   }
 
   @BeforeClass
@@ -116,7 +118,7 @@ public class TestTlogReplica extends SolrCloudTestCase {
     for (JettySolrRunner jetty:cluster.getJettySolrRunners()) {
       if (!jetty.isRunning()) {
         log.warn("Jetty {} not running, probably some bad test. Starting it", jetty.getLocalPort());
-        ChaosMonkey.start(jetty);
+        jetty.start();
       }
     }
     if (cluster.getSolrClient().getZkStateReader().getClusterState().getCollectionOrNull(collectionName) != null) {
@@ -156,6 +158,7 @@ public class TestTlogReplica extends SolrCloudTestCase {
           CollectionAdminRequest.createCollection(collectionName, "conf", 2, 0, 4, 0)
           .setMaxShardsPerNode(100)
           .process(cluster.getSolrClient());
+          cluster.waitForActiveCollection(collectionName, 2, 8);
           break;
         case 1:
           // Sometimes don't use SolrJ
@@ -168,6 +171,7 @@ public class TestTlogReplica extends SolrCloudTestCase {
           HttpGet createCollectionGet = new HttpGet(url);
           HttpResponse httpResponse = cluster.getSolrClient().getHttpClient().execute(createCollectionGet);
           assertEquals(200, httpResponse.getStatusLine().getStatusCode());
+          cluster.waitForActiveCollection(collectionName, 2, 8);
           break;
         case 2:
           // Sometimes use V2 API
@@ -182,6 +186,7 @@ public class TestTlogReplica extends SolrCloudTestCase {
           createCollectionPost.setEntity(new StringEntity(requestBody));
           httpResponse = cluster.getSolrClient().getHttpClient().execute(createCollectionPost);
           assertEquals(200, httpResponse.getStatusLine().getStatusCode());
+          cluster.waitForActiveCollection(collectionName, 2, 8);
           break;
       }
       
@@ -213,6 +218,7 @@ public class TestTlogReplica extends SolrCloudTestCase {
           CollectionAdminResponse response = CollectionAdminRequest.reloadCollection(collectionName)
           .process(cluster.getSolrClient());
           assertEquals(0, response.getStatus());
+          waitForState("failed waiting for active colletion", collectionName, clusterShape(2, 4));
           reloaded = true;
         }
       }
@@ -273,7 +279,7 @@ public class TestTlogReplica extends SolrCloudTestCase {
     addReplicaToShard("shard2", Replica.Type.TLOG);
     docCollection = assertNumberOfReplicas(0, 4, 0, true, false);
     
-    waitForState("Expecting collection to have 2 shards and 2 replica each", collectionName, clusterShape(2, 2));
+    waitForState("Expecting collection to have 2 shards and 2 replica each", collectionName, clusterShape(2, 4));
     
     //Delete tlog replica from shard1
     CollectionAdminRequest.deleteReplica(
@@ -395,7 +401,7 @@ public class TestTlogReplica extends SolrCloudTestCase {
       .process(cluster.getSolrClient());
     } else {
       leaderJetty = cluster.getReplicaJetty(s.getLeader());
-      ChaosMonkey.kill(leaderJetty);
+      leaderJetty.stop();
       waitForState("Leader replica not removed", collectionName, clusterShape(1, 1));
       // Wait for cluster state to be updated
       waitForState("Replica state not updated in cluster state", 
@@ -425,7 +431,7 @@ public class TestTlogReplica extends SolrCloudTestCase {
     if (removeReplica) {
       CollectionAdminRequest.addReplicaToShard(collectionName, "shard1", Replica.Type.TLOG).process(cluster.getSolrClient());
     } else {
-      ChaosMonkey.start(leaderJetty);
+      leaderJetty.stop();
     }
     waitForState("Expected collection to be 1x2", collectionName, clusterShape(1, 2));
     // added replica should replicate from the leader
@@ -441,7 +447,7 @@ public class TestTlogReplica extends SolrCloudTestCase {
     waitForNumDocsInAllActiveReplicas(1);
     
     JettySolrRunner pullReplicaJetty = cluster.getReplicaJetty(docCollection.getSlice("shard1").getReplicas(EnumSet.of(Replica.Type.TLOG)).get(0));
-    ChaosMonkey.kill(pullReplicaJetty);
+    pullReplicaJetty.stop();
     waitForState("Replica not removed", collectionName, activeReplicaCount(0, 1, 0));
 //    // Also wait for the replica to be placed in state="down"
 //    waitForState("Didn't update state", collectionName, clusterStateReflectsActiveAndDownReplicas());
@@ -450,7 +456,7 @@ public class TestTlogReplica extends SolrCloudTestCase {
     cluster.getSolrClient().commit(collectionName);
     waitForNumDocsInAllActiveReplicas(2);
     
-    ChaosMonkey.start(pullReplicaJetty);
+    pullReplicaJetty.stop();
     waitForState("Replica not added", collectionName, activeReplicaCount(0, 2, 0));
     waitForNumDocsInAllActiveReplicas(2);
   }
@@ -538,15 +544,15 @@ public class TestTlogReplica extends SolrCloudTestCase {
         .process(cloudClient, collectionName);
     JettySolrRunner solrRunner = getSolrRunner(false).get(0);
     if (useKill) { 
-      ChaosMonkey.kill(solrRunner);
+      solrRunner.stop();
     } else {
-      ChaosMonkey.stop(solrRunner);
+      solrRunner.stop();
     }
     waitForState("Replica still up", collectionName, activeReplicaCount(0,1,0));
     new UpdateRequest()
         .add(sdoc("id", "6"))
         .process(cloudClient, collectionName);
-    ChaosMonkey.start(solrRunner);
+    solrRunner.stop();
     waitForState("Replica didn't recover", collectionName, activeReplicaCount(0,2,0));
     // We skip peerSync, so replica will always trigger commit on leader
     // We query only the non-leader replicas, since we haven't opened a new searcher on the leader yet
@@ -566,10 +572,10 @@ public class TestTlogReplica extends SolrCloudTestCase {
     }
     checkRTG(3,7, cluster.getJettySolrRunners());
     DirectUpdateHandler2.commitOnClose = false;
-    ChaosMonkey.stop(solrRunner);
+    solrRunner.stop();
     waitForState("Replica still up", collectionName, activeReplicaCount(0,1,0));
     DirectUpdateHandler2.commitOnClose = true;
-    ChaosMonkey.start(solrRunner);
+    solrRunner.stop();
     waitForState("Replica didn't recover", collectionName, activeReplicaCount(0,2,0));
     waitForNumDocsInAllReplicas(5, getNonLeaderReplias(collectionName), 10); //timeout for stale collection state
     checkRTG(3,7, cluster.getJettySolrRunners());
@@ -588,11 +594,11 @@ public class TestTlogReplica extends SolrCloudTestCase {
       }
     };
     if (useKill) { 
-      ChaosMonkey.kill(solrRunner);
+      solrRunner.stop();
     } else {
-      ChaosMonkey.stop(solrRunner);
+      solrRunner.stop();
     }
-    ChaosMonkey.start(solrRunner);
+    solrRunner.stop();
     waitingForReplay.acquire();
     // If I add the doc immediately, the leader fails to communicate with the follower with broken pipe.
     // Options are, wait or retry...
@@ -660,13 +666,13 @@ public class TestTlogReplica extends SolrCloudTestCase {
         .add(sdoc("id", "2"))
         .process(cloudClient, collectionName);
     JettySolrRunner oldLeaderJetty = getSolrRunner(true).get(0);
-    ChaosMonkey.kill(oldLeaderJetty);
+    oldLeaderJetty.stop();
     waitForState("Replica not removed", collectionName, activeReplicaCount(0, 1, 0));
     new UpdateRequest()
         .add(sdoc("id", "3"))
         .add(sdoc("id", "4"))
         .process(cloudClient, collectionName);
-    ChaosMonkey.start(oldLeaderJetty);
+    oldLeaderJetty.stop();
     waitForState("Replica not added", collectionName, activeReplicaCount(0, 2, 0));
     checkRTG(1,4, cluster.getJettySolrRunners());
     new UpdateRequest()
@@ -692,7 +698,7 @@ public class TestTlogReplica extends SolrCloudTestCase {
     }
     JettySolrRunner oldLeaderJetty = getSolrRunner(true).get(0);
     String oldLeaderNodeName = oldLeaderJetty.getNodeName();
-    ChaosMonkey.kill(oldLeaderJetty);
+    oldLeaderJetty.stop();
     waitForState("Replica not removed", collectionName, activeReplicaCount(0, 1, 0));
     waitForState("Expect new leader", collectionName,
         (liveNodes, collectionState) -> {
@@ -701,7 +707,7 @@ public class TestTlogReplica extends SolrCloudTestCase {
           return !leader.getNodeName().equals(oldLeaderNodeName);
         }
     );
-    ChaosMonkey.start(oldLeaderJetty);
+    oldLeaderJetty.stop();
     waitForState("Replica not added", collectionName, activeReplicaCount(0, 2, 0));
     checkRTG(1,1, cluster.getJettySolrRunners());
     SolrDocument doc = cluster.getSolrClient().getById(collectionName,"1");
@@ -748,7 +754,7 @@ public class TestTlogReplica extends SolrCloudTestCase {
     .process(cluster.getSolrClient());
     int numReplicasPerShard = numNrtReplicas + numTlogReplicas + numPullReplicas;
     waitForState("Expected collection to be created with " + numShards + " shards and  " + numReplicasPerShard + " replicas",
-        collectionName, clusterShape(numShards, numReplicasPerShard));
+        collectionName, clusterShape(numShards, numShards * numReplicasPerShard));
     return assertNumberOfReplicas(numNrtReplicas*numShards, numTlogReplicas*numShards, numPullReplicas*numShards, false, true);
   }
   

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorCloud.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorCloud.java b/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorCloud.java
index 8a84724..d89e1c5 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorCloud.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorCloud.java
@@ -110,7 +110,6 @@ public class TestTolerantUpdateProcessorCloud extends SolrCloudTestCase {
     configureCluster(NUM_SERVERS)
       .addConfig(configName, configDir.toPath())
       .configure();
-    assertSpinLoopAllJettyAreRunning(cluster);
 
     CLOUD_CLIENT = cluster.getSolrClient();
     CLOUD_CLIENT.setDefaultCollection(COLLECTION_NAME);
@@ -120,10 +119,9 @@ public class TestTolerantUpdateProcessorCloud extends SolrCloudTestCase {
         .withProperty("schema", "schema15.xml") // string id for doc routing prefix
         .process(CLOUD_CLIENT);
     
-    ZkStateReader zkStateReader = CLOUD_CLIENT.getZkStateReader();
-    AbstractDistribZkTestBase.waitForRecoveriesToFinish(COLLECTION_NAME, zkStateReader, true, true, 330);
-
+    cluster.waitForActiveCollection(COLLECTION_NAME, NUM_SHARDS, REPLICATION_FACTOR * NUM_SHARDS);
 
+    ZkStateReader zkStateReader = CLOUD_CLIENT.getZkStateReader();
     // really hackish way to get a URL for specific nodes based on shard/replica hosting
     // inspired by TestMiniSolrCloudCluster
     HashMap<String, String> urlMap = new HashMap<>();
@@ -922,40 +920,6 @@ public class TestTolerantUpdateProcessorCloud extends SolrCloudTestCase {
     assertQueryDocIds(client, false, docId21, docId22);
                       
   }
-
-  /**
-   * HACK: Loops over every Jetty instance in the specified MiniSolrCloudCluster to see if they are running,
-   * and sleeps small increments until they all report that they are, or a max num iters is reached
-   * 
-   * (work around for SOLR-8862.  Maybe something like this should be promoted into MiniSolrCloudCluster's 
-   * start() method? or SolrCloudTestCase's configureCluster?)
-   */
-  public static void assertSpinLoopAllJettyAreRunning(MiniSolrCloudCluster cluster) throws InterruptedException {
-    // NOTE: idealy we could use an ExecutorService that tried to open Sockets (with a long timeout)
-    // to each of the jetty instances in parallel w/o any sleeping -- but since they pick their ports
-    // dynamically and don't report them until/unless the server is up, that won't neccessarily do us
-    // any good.
-    final int numServers = cluster.getJettySolrRunners().size();
-    int numRunning = 0;
-    for (int i = 5; 0 <= i; i--) {
-      numRunning = 0;
-      for (JettySolrRunner jetty : cluster.getJettySolrRunners()) {
-        if (jetty.isRunning()) {
-          numRunning++;
-        }
-      }
-      if (numServers == numRunning) {
-        return;
-      } else if (0 == i) {
-        // give up
-        break;
-      }
-      // the more nodes we're waiting on, the longer we should try to sleep (within reason)
-      Thread.sleep(Math.min((numServers - numRunning) * 100, 1000));
-    }
-    assertEquals("giving up waiting for all jetty instances to be running",
-                 numServers, numRunning);
-  }
   
   /** Asserts that the UpdateResponse contains the specified expectedErrs and no others */
   public static void assertUpdateTolerantErrors(String assertionMsgPrefix,

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorRandomCloud.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorRandomCloud.java b/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorRandomCloud.java
index c60c22b..ef07a77 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorRandomCloud.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestTolerantUpdateProcessorRandomCloud.java
@@ -41,7 +41,6 @@ import org.apache.solr.cloud.TestTolerantUpdateProcessorCloud.ExpectedErr;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.SolrInputField;
-import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.common.params.SolrParams;
 import org.junit.AfterClass;
 import org.junit.Before;
@@ -96,8 +95,6 @@ public class TestTolerantUpdateProcessorRandomCloud extends SolrCloudTestCase {
     configureCluster(numServers)
       .addConfig(configName, configDir.toPath())
       .configure();
-
-    TestTolerantUpdateProcessorCloud.assertSpinLoopAllJettyAreRunning(cluster);
     
     Map<String, String> collectionProperties = new HashMap<>();
     collectionProperties.put("config", "solrconfig-distrib-update-processor-chains.xml");
@@ -110,6 +107,8 @@ public class TestTolerantUpdateProcessorRandomCloud extends SolrCloudTestCase {
         .setProperties(collectionProperties)
         .process(CLOUD_CLIENT);
 
+    cluster.waitForActiveCollection(COLLECTION_NAME, numShards, numShards * repFactor);
+    
     if (NODE_CLIENTS != null) {
       for (HttpSolrClient client : NODE_CLIENTS) {
         client.close();
@@ -123,9 +122,6 @@ public class TestTolerantUpdateProcessorRandomCloud extends SolrCloudTestCase {
     }
     assertEquals(numServers, NODE_CLIENTS.size());
     
-    ZkStateReader zkStateReader = CLOUD_CLIENT.getZkStateReader();
-    AbstractDistribZkTestBase.waitForRecoveriesToFinish(COLLECTION_NAME, zkStateReader, true, true, 330);
-    
   }
   
   @Before

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestUtilizeNode.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestUtilizeNode.java b/solr/core/src/test/org/apache/solr/cloud/TestUtilizeNode.java
index 18ac662..5a28211 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestUtilizeNode.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestUtilizeNode.java
@@ -73,7 +73,6 @@ public class TestUtilizeNode extends SolrCloudTestCase {
 
   @Test
   public void test() throws Exception {
-    cluster.waitForAllNodes(5000);
     int REPLICATION = 2;
     String coll = "utilizenodecoll";
     CloudSolrClient cloudClient = cluster.getSolrClient();