You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by da...@apache.org on 2018/12/01 17:10:00 UTC

[12/32] lucene-solr:jira/http2: SOLR-12801: Make massive improvements to the tests.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/75b18319/solr/core/src/test/org/apache/solr/TestDistributedSearch.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/TestDistributedSearch.java b/solr/core/src/test/org/apache/solr/TestDistributedSearch.java
index 3092d6f..5c29e8b 100644
--- a/solr/core/src/test/org/apache/solr/TestDistributedSearch.java
+++ b/solr/core/src/test/org/apache/solr/TestDistributedSearch.java
@@ -22,9 +22,14 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.EnumSet;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorCompletionService;
+import java.util.concurrent.Future;
 
 import org.apache.commons.lang.StringUtils;
 import org.apache.lucene.util.LuceneTestCase.Slow;
@@ -38,16 +43,15 @@ import org.apache.solr.client.solrj.response.FacetField;
 import org.apache.solr.client.solrj.response.FieldStatsInfo;
 import org.apache.solr.client.solrj.response.QueryResponse;
 import org.apache.solr.client.solrj.response.RangeFacet;
-import org.apache.solr.cloud.ChaosMonkey;
 import org.apache.solr.common.EnumFieldValue;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrException.ErrorCode;
 import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.FacetParams.FacetRangeMethod;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.ShardParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.params.StatsParams;
-import org.apache.solr.common.params.FacetParams.FacetRangeMethod;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.handler.component.ShardResponse;
 import org.apache.solr.handler.component.StatsComponentTest.StatSetCombinations;
@@ -100,6 +104,11 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
     // we validate the connection before use on the restarted
     // server so that we don't use a bad one
     System.setProperty("validateAfterInactivity", "200");
+    
+    System.setProperty("solr.httpclient.retries", "0");
+    System.setProperty("distribUpdateSoTimeout", "5000");
+    
+
   }
 
   public TestDistributedSearch() {
@@ -109,6 +118,9 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
   
   @Test
   public void test() throws Exception {
+    
+    assertEquals(clients.size(), jettys.size());
+    
     QueryResponse rsp = null;
     int backupStress = stress; // make a copy so we can restore
 
@@ -952,74 +964,81 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
     assertEquals("should have an entry for each shard ["+sinfo+"] "+shards, cnt, sinfo.size());
 
     // test shards.tolerant=true
-    for(int numDownServers = 0; numDownServers < jettys.size()-1; numDownServers++)
-    {
-      List<JettySolrRunner> upJettys = new ArrayList<>(jettys);
-      List<SolrClient> upClients = new ArrayList<>(clients);
-      List<JettySolrRunner> downJettys = new ArrayList<>();
-      List<String> upShards = new ArrayList<>(Arrays.asList(shardsArr));
-      for(int i=0; i<numDownServers; i++)
-      {
-        // shut down some of the jettys
-        int indexToRemove = r.nextInt(upJettys.size());
-        JettySolrRunner downJetty = upJettys.remove(indexToRemove);
-        upClients.remove(indexToRemove);
-        upShards.remove(indexToRemove);
-        ChaosMonkey.stop(downJetty);
-        downJettys.add(downJetty);
-      }
 
-      queryPartialResults(upShards, upClients, 
-          "q","*:*",
-          "facet","true", 
-          "facet.field",t1,
-          "facet.field",t1,
-          "facet.limit",5,
-          ShardParams.SHARDS_INFO,"true",
-          ShardParams.SHARDS_TOLERANT,"true");
-
-      queryPartialResults(upShards, upClients,
-          "q", "*:*",
-          "facet", "true",
-          "facet.query", i1 + ":[1 TO 50]",
-          "facet.query", i1 + ":[1 TO 50]",
-          ShardParams.SHARDS_INFO, "true",
-          ShardParams.SHARDS_TOLERANT, "true");
-
-      // test group query
-      queryPartialResults(upShards, upClients,
-           "q", "*:*",
-           "rows", 100,
-           "fl", "id," + i1,
-           "group", "true",
-           "group.query", t1 + ":kings OR " + t1 + ":eggs",
-           "group.limit", 10,
-           "sort", i1 + " asc, id asc",
-           CommonParams.TIME_ALLOWED, 1,
-           ShardParams.SHARDS_INFO, "true",
-           ShardParams.SHARDS_TOLERANT, "true");
-
-      queryPartialResults(upShards, upClients,
-          "q", "*:*",
-          "stats", "true",
-          "stats.field", i1,
-          ShardParams.SHARDS_INFO, "true",
-          ShardParams.SHARDS_TOLERANT, "true");
-
-      queryPartialResults(upShards, upClients,
-          "q", "toyata",
-          "spellcheck", "true",
-          "spellcheck.q", "toyata",
-          "qt", "/spellCheckCompRH_Direct",
-          "shards.qt", "/spellCheckCompRH_Direct",
-          ShardParams.SHARDS_INFO, "true",
-          ShardParams.SHARDS_TOLERANT, "true");
-
-      // restart the jettys
-      for (JettySolrRunner downJetty : downJettys) {
-        ChaosMonkey.start(downJetty);
+    List<JettySolrRunner> upJettys = Collections.synchronizedList(new ArrayList<>(jettys));
+    List<SolrClient> upClients = Collections.synchronizedList(new ArrayList<>(clients));
+    List<JettySolrRunner> downJettys = Collections.synchronizedList(new ArrayList<>());
+    List<String> upShards = Collections.synchronizedList(new ArrayList<>(Arrays.asList(shardsArr)));
+    
+    int cap =  Math.max(upJettys.size() - 1, 1);
+
+    int numDownServers = random().nextInt(cap);
+    for (int i = 0; i < numDownServers; i++) {
+      if (upJettys.size() == 1) {
+        continue;
       }
+      // shut down some of the jettys
+      int indexToRemove = r.nextInt(upJettys.size() - 1);
+      JettySolrRunner downJetty = upJettys.remove(indexToRemove);
+      upClients.remove(indexToRemove);
+      upShards.remove(indexToRemove);
+      downJetty.stop();
+      downJettys.add(downJetty);
+    }
+    
+    Thread.sleep(100);
+
+    queryPartialResults(upShards, upClients,
+        "q", "*:*",
+        "facet", "true",
+        "facet.field", t1,
+        "facet.field", t1,
+        "facet.limit", 5,
+        ShardParams.SHARDS_INFO, "true",
+        ShardParams.SHARDS_TOLERANT, "true");
+
+    queryPartialResults(upShards, upClients,
+        "q", "*:*",
+        "facet", "true",
+        "facet.query", i1 + ":[1 TO 50]",
+        "facet.query", i1 + ":[1 TO 50]",
+        ShardParams.SHARDS_INFO, "true",
+        ShardParams.SHARDS_TOLERANT, "true");
+
+    // test group query
+    queryPartialResults(upShards, upClients,
+        "q", "*:*",
+        "rows", 100,
+        "fl", "id," + i1,
+        "group", "true",
+        "group.query", t1 + ":kings OR " + t1 + ":eggs",
+        "group.limit", 10,
+        "sort", i1 + " asc, id asc",
+        CommonParams.TIME_ALLOWED, 10000,
+        ShardParams.SHARDS_INFO, "true",
+        ShardParams.SHARDS_TOLERANT, "true");
+
+    queryPartialResults(upShards, upClients,
+        "q", "*:*",
+        "stats", "true",
+        "stats.field", i1,
+        ShardParams.SHARDS_INFO, "true",
+        ShardParams.SHARDS_TOLERANT, "true");
+
+    queryPartialResults(upShards, upClients,
+        "q", "toyata",
+        "spellcheck", "true",
+        "spellcheck.q", "toyata",
+        "qt", "/spellCheckCompRH_Direct",
+        "shards.qt", "/spellCheckCompRH_Direct",
+        ShardParams.SHARDS_INFO, "true",
+        ShardParams.SHARDS_TOLERANT, "true");
+
+    // restart the jettys
+    for (JettySolrRunner downJetty : downJettys) {
+      downJetty.start();
     }
+    
 
     // This index has the same number for every field
     
@@ -1125,17 +1144,22 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
     params.remove("distrib");
     setDistributedParams(params);
 
-    QueryResponse rsp = queryRandomUpServer(params,upClients);
+    if (upClients.size() == 0) {
+      return;
+    }
+    QueryResponse rsp = queryRandomUpServer(params, upClients);
 
     comparePartialResponses(rsp, controlRsp, upShards);
 
     if (stress > 0) {
       log.info("starting stress...");
-      Thread[] threads = new Thread[nThreads];
+      Set<Future<Object>> pending = new HashSet<>();;
+      ExecutorCompletionService<Object> cs = new ExecutorCompletionService<>(executor);
+      Callable[] threads = new Callable[nThreads];
       for (int i = 0; i < threads.length; i++) {
-        threads[i] = new Thread() {
+        threads[i] = new Callable() {
           @Override
-          public void run() {
+          public Object call() {
             for (int j = 0; j < stress; j++) {
               int which = r.nextInt(upClients.size());
               SolrClient client = upClients.get(which);
@@ -1148,21 +1172,32 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
                 throw new RuntimeException(e);
               }
             }
+            return null;
           }
         };
-        threads[i].start();
+        pending.add(cs.submit(threads[i]));
       }
-
-      for (Thread thread : threads) {
-        thread.join();
+      
+      while (pending.size() > 0) {
+        Future<Object> future = cs.take();
+        pending.remove(future);
+        future.get();
       }
+
     }
   }
 
-  protected QueryResponse queryRandomUpServer(ModifiableSolrParams params, List<SolrClient> upClients) throws SolrServerException, IOException {
+  protected QueryResponse queryRandomUpServer(ModifiableSolrParams params, List<SolrClient> upClients)
+      throws SolrServerException, IOException {
     // query a random "up" server
-    int which = r.nextInt(upClients.size());
-    SolrClient client = upClients.get(which);
+    SolrClient client;
+    if (upClients.size() == 1) {
+      client = upClients.get(0);
+    } else {
+      int which = r.nextInt(upClients.size() - 1);
+      client = upClients.get(which);
+    }
+
     QueryResponse rsp = client.query(params);
     return rsp;
   }
@@ -1195,7 +1230,7 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
               assertTrue("Expected timeAllowedError or to find shardAddress in the up shard info: " + info.toString(), info.get("shardAddress") != null);
             }
           } else {
-            assertEquals("Expected to find the " + SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY + " header set if a shard is down",
+            assertEquals("Expected to find the " + SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY + " header set if a shard is down. Response: " + rsp,
                 Boolean.TRUE, rsp.getHeader().get(SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY));
             assertTrue("Expected to find error in the down shard info: " + info.toString(), info.get("error") != null);
           }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/75b18319/solr/core/src/test/org/apache/solr/TestHighlightDedupGrouping.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/TestHighlightDedupGrouping.java b/solr/core/src/test/org/apache/solr/TestHighlightDedupGrouping.java
index d3f3796..1b707a5 100644
--- a/solr/core/src/test/org/apache/solr/TestHighlightDedupGrouping.java
+++ b/solr/core/src/test/org/apache/solr/TestHighlightDedupGrouping.java
@@ -16,14 +16,16 @@
  */
 package org.apache.solr;
 
+import java.io.IOException;
+
+import org.apache.lucene.search.TimeLimitingCollector;
 import org.apache.lucene.util.TestUtil;
 import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.response.QueryResponse;
 import org.apache.solr.common.SolrInputDocument;
+import org.junit.AfterClass;
 import org.junit.Test;
 
-import java.io.IOException;
-
 /**
  * Tests that highlighting doesn't break on grouped documents
  * with duplicate unique key fields stored on multiple shards.
@@ -34,6 +36,12 @@ public class TestHighlightDedupGrouping extends BaseDistributedSearchTestCase {
   private static final String group_ti1 = "group_ti1";
   private static final String shard_i1 = "shard_i1";
 
+  @AfterClass
+  public static void afterClass() throws Exception {
+    TimeLimitingCollector.getGlobalTimerThread().stopTimer();
+    TimeLimitingCollector.getGlobalTimerThread().join();
+  }
+  
   @Test
   @ShardsFixed(num = 2)
   public void test() throws Exception {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/75b18319/solr/core/src/test/org/apache/solr/TestTolerantSearch.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/TestTolerantSearch.java b/solr/core/src/test/org/apache/solr/TestTolerantSearch.java
index 61a11f0..86d50a7 100644
--- a/solr/core/src/test/org/apache/solr/TestTolerantSearch.java
+++ b/solr/core/src/test/org/apache/solr/TestTolerantSearch.java
@@ -57,7 +57,7 @@ public class TestTolerantSearch extends SolrJettyTestBase {
   @BeforeClass
   public static void createThings() throws Exception {
     solrHome = createSolrHome();
-    createJetty(solrHome.getAbsolutePath());
+    createAndStartJetty(solrHome.getAbsolutePath());
     String url = jetty.getBaseUrl().toString();
     collection1 = getHttpSolrClient(url + "/collection1");
     collection2 = getHttpSolrClient(url + "/collection2");

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/75b18319/solr/core/src/test/org/apache/solr/cloud/AddReplicaTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/AddReplicaTest.java b/solr/core/src/test/org/apache/solr/cloud/AddReplicaTest.java
index 8980ba8..3bfda38 100644
--- a/solr/core/src/test/org/apache/solr/cloud/AddReplicaTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/AddReplicaTest.java
@@ -16,6 +16,9 @@
  */
 package org.apache.solr.cloud;
 
+import static org.apache.solr.client.solrj.response.RequestStatusState.COMPLETED;
+import static org.apache.solr.client.solrj.response.RequestStatusState.FAILED;
+
 import java.lang.invoke.MethodHandles;
 import java.util.Collection;
 import java.util.EnumSet;
@@ -27,26 +30,21 @@ import org.apache.solr.client.solrj.response.RequestStatusState;
 import org.apache.solr.common.cloud.ClusterState;
 import org.apache.solr.common.cloud.DocCollection;
 import org.apache.solr.common.cloud.Replica;
-import org.apache.solr.util.LogLevel;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import static org.apache.solr.client.solrj.response.RequestStatusState.COMPLETED;
-import static org.apache.solr.client.solrj.response.RequestStatusState.FAILED;
-
 /**
  *
  */
-@LogLevel("org.apache.solr.cloud=DEBUG;org.apache.solr.cloud.Overseer=DEBUG;org.apache.solr.cloud.overseer=DEBUG;")
 public class AddReplicaTest extends SolrCloudTestCase {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
   @BeforeClass
   public static void setupCluster() throws Exception {
-    configureCluster(4)
+    configureCluster(3)
         .addConfig("conf1", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf"))
         .configure();
   }
@@ -59,13 +57,14 @@ public class AddReplicaTest extends SolrCloudTestCase {
 
   @Test
   public void testAddMultipleReplicas() throws Exception  {
-    cluster.waitForAllNodes(5);
+
     String collection = "testAddMultipleReplicas";
     CloudSolrClient cloudClient = cluster.getSolrClient();
 
     CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collection, "conf1", 1, 1);
     create.setMaxShardsPerNode(2);
     cloudClient.request(create);
+    cluster.waitForActiveCollection(collection, 1, 1);
 
     CollectionAdminRequest.AddReplica addReplica = CollectionAdminRequest.addReplicaToShard(collection, "shard1")
         .setNrtReplicas(1)
@@ -73,6 +72,9 @@ public class AddReplicaTest extends SolrCloudTestCase {
         .setPullReplicas(1);
     RequestStatusState status = addReplica.processAndWait(collection + "_xyz1", cloudClient, 120);
     assertEquals(COMPLETED, status);
+    
+    cluster.waitForActiveCollection(collection, 1, 4);
+    
     DocCollection docCollection = cloudClient.getZkStateReader().getClusterState().getCollectionOrNull(collection);
     assertNotNull(docCollection);
     assertEquals(4, docCollection.getReplicas().size());
@@ -110,6 +112,7 @@ public class AddReplicaTest extends SolrCloudTestCase {
         .setCreateNodeSet(String.join(",", createNodeSet));
     status = addReplica.processAndWait(collection + "_xyz1", cloudClient, 120);
     assertEquals(COMPLETED, status);
+    waitForState("Timedout wait for collection to be created", collection, clusterShape(1, 9));
     docCollection = cloudClient.getZkStateReader().getClusterState().getCollectionOrNull(collection);
     assertNotNull(docCollection);
     // sanity check that everything is as before
@@ -120,9 +123,8 @@ public class AddReplicaTest extends SolrCloudTestCase {
   }
 
   @Test
-  //commented 2-Aug-2018 @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 09-Apr-2018
   public void test() throws Exception {
-    cluster.waitForAllNodes(5);
+    
     String collection = "addreplicatest_coll";
 
     CloudSolrClient cloudClient = cluster.getSolrClient();
@@ -130,6 +132,8 @@ public class AddReplicaTest extends SolrCloudTestCase {
     CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collection, "conf1", 2, 1);
     create.setMaxShardsPerNode(2);
     cloudClient.request(create);
+    
+    cluster.waitForActiveCollection(collection, 2, 2);
 
     ClusterState clusterState = cloudClient.getZkStateReader().getClusterState();
     DocCollection coll = clusterState.getCollection(collection);
@@ -140,6 +144,7 @@ public class AddReplicaTest extends SolrCloudTestCase {
     CollectionAdminRequest.RequestStatus requestStatus = CollectionAdminRequest.requestStatus("000");
     CollectionAdminRequest.RequestStatusResponse rsp = requestStatus.process(cloudClient);
     assertNotSame(rsp.getRequestStatus(), COMPLETED);
+    
     // wait for async request success
     boolean success = false;
     for (int i = 0; i < 200; i++) {
@@ -152,11 +157,10 @@ public class AddReplicaTest extends SolrCloudTestCase {
       Thread.sleep(500);
     }
     assertTrue(success);
+    
     Collection<Replica> replicas2 = cloudClient.getZkStateReader().getClusterState().getCollection(collection).getSlice(sliceName).getReplicas();
     replicas2.removeAll(replicas);
     assertEquals(1, replicas2.size());
-    Replica r = replicas2.iterator().next();
-    assertNotSame(r.toString(), r.getState(), Replica.State.ACTIVE);
 
     // use waitForFinalState
     addReplica.setWaitForFinalState(true);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/75b18319/solr/core/src/test/org/apache/solr/cloud/AliasIntegrationTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/AliasIntegrationTest.java b/solr/core/src/test/org/apache/solr/cloud/AliasIntegrationTest.java
index 1af1adf..47a8a99 100644
--- a/solr/core/src/test/org/apache/solr/cloud/AliasIntegrationTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/AliasIntegrationTest.java
@@ -90,7 +90,11 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
   public void testProperties() throws Exception {
     CollectionAdminRequest.createCollection("collection1meta", "conf", 2, 1).process(cluster.getSolrClient());
     CollectionAdminRequest.createCollection("collection2meta", "conf", 1, 1).process(cluster.getSolrClient());
-    waitForState("Expected collection1 to be created with 2 shards and 1 replica", "collection1meta", clusterShape(2, 1));
+    
+    cluster.waitForActiveCollection("collection1meta", 2, 2);
+    cluster.waitForActiveCollection("collection2meta", 1, 1);
+    
+    waitForState("Expected collection1 to be created with 2 shards and 1 replica", "collection1meta", clusterShape(2, 2));
     waitForState("Expected collection2 to be created with 1 shard and 1 replica", "collection2meta", clusterShape(1, 1));
     ZkStateReader zkStateReader = cluster.getSolrClient().getZkStateReader();
     zkStateReader.createClusterStateWatchersAndUpdate();
@@ -204,7 +208,7 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
 
   @Test
   public void testModifyPropertiesV2() throws Exception {
-    final String aliasName = getTestName();
+    final String aliasName = getSaferTestName();
     ZkStateReader zkStateReader = createColectionsAndAlias(aliasName);
     final String baseUrl = cluster.getRandomJetty(random()).getBaseUrl().toString();
     //TODO fix Solr test infra so that this /____v2/ becomes /api/
@@ -226,7 +230,7 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
   @Test
   public void testModifyPropertiesV1() throws Exception {
     // note we don't use TZ in this test, thus it's UTC
-    final String aliasName = getTestName();
+    final String aliasName = getSaferTestName();
     ZkStateReader zkStateReader = createColectionsAndAlias(aliasName);
     final String baseUrl = cluster.getRandomJetty(random()).getBaseUrl().toString();
     HttpGet get = new HttpGet(baseUrl + "/admin/collections?action=ALIASPROP" +
@@ -241,7 +245,7 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
   @Test
   public void testModifyPropertiesCAR() throws Exception {
     // note we don't use TZ in this test, thus it's UTC
-    final String aliasName = getTestName();
+    final String aliasName = getSaferTestName();
     ZkStateReader zkStateReader = createColectionsAndAlias(aliasName);
     CollectionAdminRequest.SetAliasProperty setAliasProperty = CollectionAdminRequest.setAliasProperty(aliasName);
     setAliasProperty.addProperty("foo","baz");
@@ -278,7 +282,11 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
   private ZkStateReader createColectionsAndAlias(String aliasName) throws SolrServerException, IOException, KeeperException, InterruptedException {
     CollectionAdminRequest.createCollection("collection1meta", "conf", 2, 1).process(cluster.getSolrClient());
     CollectionAdminRequest.createCollection("collection2meta", "conf", 1, 1).process(cluster.getSolrClient());
-    waitForState("Expected collection1 to be created with 2 shards and 1 replica", "collection1meta", clusterShape(2, 1));
+    
+    cluster.waitForActiveCollection("collection1meta", 2, 2);
+    cluster.waitForActiveCollection("collection2meta", 1, 1);
+    
+    waitForState("Expected collection1 to be created with 2 shards and 1 replica", "collection1meta", clusterShape(2, 2));
     waitForState("Expected collection2 to be created with 1 shard and 1 replica", "collection2meta", clusterShape(1, 1));
     ZkStateReader zkStateReader = cluster.getSolrClient().getZkStateReader();
     zkStateReader.createClusterStateWatchersAndUpdate();
@@ -326,7 +334,11 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
   public void testDeleteAliasWithExistingCollectionName() throws Exception {
     CollectionAdminRequest.createCollection("collection_old", "conf", 2, 1).process(cluster.getSolrClient());
     CollectionAdminRequest.createCollection("collection_new", "conf", 1, 1).process(cluster.getSolrClient());
-    waitForState("Expected collection_old to be created with 2 shards and 1 replica", "collection_old", clusterShape(2, 1));
+    
+    cluster.waitForActiveCollection("collection_old", 2, 2);
+    cluster.waitForActiveCollection("collection_new", 1, 1);
+    
+    waitForState("Expected collection_old to be created with 2 shards and 1 replica", "collection_old", clusterShape(2, 2));
     waitForState("Expected collection_new to be created with 1 shard and 1 replica", "collection_new", clusterShape(1, 1));
 
     new UpdateRequest()
@@ -399,7 +411,11 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
   public void testDeleteOneOfTwoCollectionsAliased() throws Exception {
     CollectionAdminRequest.createCollection("collection_one", "conf", 2, 1).process(cluster.getSolrClient());
     CollectionAdminRequest.createCollection("collection_two", "conf", 1, 1).process(cluster.getSolrClient());
-    waitForState("Expected collection_one to be created with 2 shards and 1 replica", "collection_one", clusterShape(2, 1));
+    
+    cluster.waitForActiveCollection("collection_one", 2, 2);
+    cluster.waitForActiveCollection("collection_two", 1, 1);
+    
+    waitForState("Expected collection_one to be created with 2 shards and 1 replica", "collection_one", clusterShape(2, 2));
     waitForState("Expected collection_two to be created with 1 shard and 1 replica", "collection_two", clusterShape(1, 1));
 
     new UpdateRequest()
@@ -439,8 +455,9 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
     // was deleted (and, assuming that it only points to collection_old).
     try {
       cluster.getSolrClient().query("collection_one", new SolrQuery("*:*"));
-    } catch (SolrServerException se) {
-      assertTrue(se.getMessage().contains("No live SolrServers"));
+      fail("should have failed");
+    } catch (SolrServerException | SolrException se) {
+ 
     }
 
     // Clean up
@@ -464,7 +481,11 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
   public void test() throws Exception {
     CollectionAdminRequest.createCollection("collection1", "conf", 2, 1).process(cluster.getSolrClient());
     CollectionAdminRequest.createCollection("collection2", "conf", 1, 1).process(cluster.getSolrClient());
-    waitForState("Expected collection1 to be created with 2 shards and 1 replica", "collection1", clusterShape(2, 1));
+    
+    cluster.waitForActiveCollection("collection1", 2, 2);
+    cluster.waitForActiveCollection("collection2", 1, 1);
+    
+    waitForState("Expected collection1 to be created with 2 shards and 1 replica", "collection1", clusterShape(2, 2));
     waitForState("Expected collection2 to be created with 1 shard and 1 replica", "collection2", clusterShape(1, 1));
 
     new UpdateRequest()
@@ -495,6 +516,8 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
     // test alias pointing to two collections.  collection2 first because it's not on every node
     CollectionAdminRequest.createAlias("testalias2", "collection2,collection1").process(cluster.getSolrClient());
 
+    Thread.sleep(100);
+    
     searchSeveralWays("testalias2", new SolrQuery("*:*"), 5);
 
     ///////////////
@@ -618,7 +641,9 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
   @Test
   public void testErrorChecks() throws Exception {
     CollectionAdminRequest.createCollection("testErrorChecks-collection", "conf", 2, 1).process(cluster.getSolrClient());
-    waitForState("Expected testErrorChecks-collection to be created with 2 shards and 1 replica", "testErrorChecks-collection", clusterShape(2, 1));
+    
+    cluster.waitForActiveCollection("testErrorChecks-collection", 2, 2);
+    waitForState("Expected testErrorChecks-collection to be created with 2 shards and 1 replica", "testErrorChecks-collection", clusterShape(2, 2));
     
     ignoreException(".");
     

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/75b18319/solr/core/src/test/org/apache/solr/cloud/AssignBackwardCompatibilityTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/AssignBackwardCompatibilityTest.java b/solr/core/src/test/org/apache/solr/cloud/AssignBackwardCompatibilityTest.java
index 3a131a8..8700e14 100644
--- a/solr/core/src/test/org/apache/solr/cloud/AssignBackwardCompatibilityTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/AssignBackwardCompatibilityTest.java
@@ -56,8 +56,6 @@ public class AssignBackwardCompatibilityTest extends SolrCloudTestCase {
   }
 
   @Test
-  //05-Jul-2018 @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 21-May-2018
-  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2-Aug-2018
   public void test() throws IOException, SolrServerException, KeeperException, InterruptedException {
     Set<String> coreNames = new HashSet<>();
     Set<String> coreNodeNames = new HashSet<>();
@@ -81,6 +79,7 @@ public class AssignBackwardCompatibilityTest extends SolrCloudTestCase {
         DocCollection dc = getCollectionState(COLLECTION);
         Replica replica = getRandomReplica(dc.getSlice("shard1"), (r) -> r.getState() == Replica.State.ACTIVE);
         CollectionAdminRequest.deleteReplica(COLLECTION, "shard1", replica.getName()).process(cluster.getSolrClient());
+        coreNames.remove(replica.getCoreName());
         numLiveReplicas--;
       } else {
         CollectionAdminResponse response = CollectionAdminRequest.addReplicaToShard(COLLECTION, "shard1")

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/75b18319/solr/core/src/test/org/apache/solr/cloud/AsyncCallRequestStatusResponseTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/AsyncCallRequestStatusResponseTest.java b/solr/core/src/test/org/apache/solr/cloud/AsyncCallRequestStatusResponseTest.java
index 7464c87..cdadfd3 100644
--- a/solr/core/src/test/org/apache/solr/cloud/AsyncCallRequestStatusResponseTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/AsyncCallRequestStatusResponseTest.java
@@ -40,7 +40,7 @@ public class AsyncCallRequestStatusResponseTest extends SolrCloudTestCase {
     String asyncId =
         CollectionAdminRequest.createCollection("asynccall", "conf", 2, 1).processAsync(cluster.getSolrClient());
 
-    waitForState("Expected collection 'asynccall' to have 2 shards and 1 replica", "asynccall", clusterShape(2, 1));
+    waitForState("Expected collection 'asynccall' to have 2 shards and 1 replica", "asynccall", clusterShape(2, 2));
 
     int tries = 0;
     while (true) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/75b18319/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZk2Test.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZk2Test.java b/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZk2Test.java
index 6b03824..b67be48 100644
--- a/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZk2Test.java
+++ b/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZk2Test.java
@@ -67,7 +67,7 @@ public class BasicDistributedZk2Test extends AbstractFullDistribZkTestBase {
 
   @Override
   protected boolean useTlogReplicas() {
-    return onlyLeaderIndexes;
+    return false; // TODO: tlog replicas makes commits take way to long due to what is likely a bug and it's TestInjection use
   }
   
   @Test
@@ -351,7 +351,7 @@ public class BasicDistributedZk2Test extends AbstractFullDistribZkTestBase {
     // query("q","matchesnothing","fl","*,score", "debugQuery", "true");
     
     // this should trigger a recovery phase on deadShard
-    ChaosMonkey.start(deadShard.jetty);
+    deadShard.jetty.start();
     
     // make sure we have published we are recovering
     Thread.sleep(1500);
@@ -381,7 +381,7 @@ public class BasicDistributedZk2Test extends AbstractFullDistribZkTestBase {
     
     Thread.sleep(1500);
     
-    ChaosMonkey.start(deadShard.jetty);
+    deadShard.jetty.start();
     
     // make sure we have published we are recovering
     Thread.sleep(1500);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/75b18319/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java b/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java
index ccc6528..c95ae85 100644
--- a/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java
@@ -28,12 +28,16 @@ import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.Callable;
 import java.util.concurrent.CompletionService;
+import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.ExecutorCompletionService;
 import java.util.concurrent.Future;
 import java.util.concurrent.SynchronousQueue;
 import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.atomic.AtomicReference;
 
 import org.apache.commons.lang.StringUtils;
 import org.apache.lucene.util.IOUtils;
@@ -74,7 +78,9 @@ import org.apache.solr.common.params.UpdateParams;
 import org.apache.solr.common.util.ExecutorUtil;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.util.DefaultSolrThreadFactory;
-import org.apache.solr.util.RTimer;
+import org.apache.solr.util.TestInjection;
+import org.apache.solr.util.TestInjection.Hook;
+import org.junit.BeforeClass;
 import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -86,7 +92,6 @@ import org.slf4j.LoggerFactory;
  */
 @Slow 
 @SuppressSSL(bugUrl = "https://issues.apache.org/jira/browse/SOLR-5776")
-// DO NOT ENABLE @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2018-06-18
 public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
 
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@@ -94,6 +99,7 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
   private static final String DEFAULT_COLLECTION = "collection1";
 
   private final boolean onlyLeaderIndexes = random().nextBoolean();
+  
   String t1="a_t";
   String i1="a_i1";
   String tlong = "other_tl1";
@@ -108,13 +114,37 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
   
   private AtomicInteger nodeCounter = new AtomicInteger();
   
-  ThreadPoolExecutor executor = new ExecutorUtil.MDCAwareThreadPoolExecutor(0,
-      Integer.MAX_VALUE, 5, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(),
-      new DefaultSolrThreadFactory("testExecutor"));
-  
   CompletionService<Object> completionService;
   Set<Future<Object>> pending;
   
+  private static Hook newSearcherHook = new Hook() {
+    volatile CountDownLatch latch;
+    AtomicReference<String> collection = new AtomicReference<>();
+
+    @Override
+    public void newSearcher(String collectionName) {
+      String c = collection.get();
+      if (c  != null && c.equals(collectionName)) {
+        log.info("Hook detected newSearcher");
+        try {
+          latch.countDown();
+        } catch (NullPointerException e) {
+
+        }
+      }
+    }
+  
+    public void waitForSearcher(String collection, int cnt, int timeoutms, boolean failOnTimeout) throws InterruptedException {
+      latch = new CountDownLatch(cnt);
+      this.collection.set(collection);
+      boolean timeout = !latch.await(timeoutms, TimeUnit.MILLISECONDS);
+      if (timeout && failOnTimeout) {
+        fail("timed out waiting for new searcher event " + latch.getCount());
+      }
+    }
+  
+  };
+  
   public BasicDistributedZkTest() {
     // we need DVs on point fields to compute stats & facets
     if (Boolean.getBoolean(NUMERIC_POINTS_SYSPROP)) System.setProperty(NUMERIC_DOCVALUES_SYSPROP,"true");
@@ -124,10 +154,15 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
     pending = new HashSet<>();
     
   }
+  
+  @BeforeClass
+  public static void beforeBDZKTClass() {
+    TestInjection.newSearcherHook(newSearcherHook);
+  }
 
   @Override
   protected boolean useTlogReplicas() {
-    return onlyLeaderIndexes;
+    return false; // TODO: tlog replicas makes commits take way to long due to what is likely a bug and it's TestInjection use
   }
 
   @Override
@@ -149,8 +184,6 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
 
   @Test
   @ShardsFixed(num = 4)
-  //DO NOT ENABLE @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 12-Jun-2018
-  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 14-Oct-2018
   public void test() throws Exception {
     // setLoggingLevel(null);
 
@@ -345,23 +378,33 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
     params.set("commitWithin", 10);
     add(cloudClient, params , getDoc("id", 300), getDoc("id", 301));
 
-    waitForDocCount(before + 2, 30000, "add commitWithin did not work");
+    newSearcherHook.waitForSearcher(DEFAULT_COLLECTION, 2, 20000, false);
+    
+    ClusterState clusterState = getCommonCloudSolrClient().getZkStateReader().getClusterState();
+    DocCollection dColl = clusterState.getCollection(DEFAULT_COLLECTION);
+
+    assertSliceCounts("should have found 2 docs, 300 and 301", before + 2, dColl);
 
     // try deleteById commitWithin
     UpdateRequest deleteByIdReq = new UpdateRequest();
     deleteByIdReq.deleteById("300");
     deleteByIdReq.setCommitWithin(10);
     deleteByIdReq.process(cloudClient);
+    
+    newSearcherHook.waitForSearcher(DEFAULT_COLLECTION, 2, 20000, false);
 
-    waitForDocCount(before + 1, 30000, "deleteById commitWithin did not work");
-
+    assertSliceCounts("deleteById commitWithin did not work", before + 1, dColl);
+    
     // try deleteByQuery commitWithin
     UpdateRequest deleteByQueryReq = new UpdateRequest();
     deleteByQueryReq.deleteByQuery("id:301");
     deleteByQueryReq.setCommitWithin(10);
     deleteByQueryReq.process(cloudClient);
 
-    waitForDocCount(before, 30000, "deleteByQuery commitWithin did not work");
+    newSearcherHook.waitForSearcher(DEFAULT_COLLECTION, 2, 20000, false);
+    
+    assertSliceCounts("deleteByQuery commitWithin did not work", before, dColl);
+    
 
     // TODO: This test currently fails because debug info is obtained only
     // on shards with matches.
@@ -384,24 +427,41 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
     testStopAndStartCoresInOneInstance();
   }
 
-  // Insure that total docs found is the expected number.
-  private void waitForDocCount(long expectedNumFound, long waitMillis, String failureMessage)
-      throws Exception {
-    RTimer timer = new RTimer();
-    long timeout = (long)timer.getTime() + waitMillis;
+  private void assertSliceCounts(String msg, long expected, DocCollection dColl) throws Exception {
+    long found = checkSlicesSameCounts(dColl);
     
-    ClusterState clusterState = getCommonCloudSolrClient().getZkStateReader().getClusterState();
-    DocCollection dColl = clusterState.getCollection(DEFAULT_COLLECTION);
-    long docTotal = -1; // Could use this for 0 hits too!
+    if (found != expected) {
+      // we get one do over in a bad race
+      Thread.sleep(1000);
+      found = checkSlicesSameCounts(dColl);
+    }
     
-    while (docTotal != expectedNumFound && timeout > (long) timer.getTime()) {
-      docTotal = checkSlicesSameCounts(dColl);
-      if (docTotal != expectedNumFound) {
-        Thread.sleep(100);
-      }
+    assertEquals(msg, expected, checkSlicesSameCounts(dColl));
+  }
+
+  // Ensure that total docs found is the expected number.
+  private void waitForDocCount(long expectedNumFound, long waitMillis, String failureMessage)
+      throws Exception {
+    AtomicLong total = new AtomicLong(-1);
+    try {
+      getCommonCloudSolrClient().getZkStateReader().waitForState(DEFAULT_COLLECTION, waitMillis, TimeUnit.MILLISECONDS, (n, c) -> {
+        long docTotal;
+        try {
+          docTotal = checkSlicesSameCounts(c);
+        } catch (SolrServerException | IOException e) {
+          throw new RuntimeException(e);
+        }
+        total.set(docTotal);
+        if (docTotal == expectedNumFound) {
+          return true;
+        }
+        return false;
+      });
+    } catch (TimeoutException | InterruptedException e) {
+     
     }
     // We could fail here if we broke out of the above because we exceeded the time allowed.
-    assertEquals(failureMessage, expectedNumFound, docTotal);
+    assertEquals(failureMessage, expectedNumFound, total.get());
 
     // This should be redundant, but it caught a test error after all.
     for (SolrClient client : clients) {
@@ -557,11 +617,10 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
       }
     }
     
-    ChaosMonkey.stop(cloudJettys.get(0).jetty);
+    cloudJettys.get(0).jetty.stop();
     printLayout();
 
-    Thread.sleep(5000);
-    ChaosMonkey.start(cloudJettys.get(0).jetty);
+    cloudJettys.get(0).jetty.start();
     cloudClient.getZkStateReader().forceUpdateCollection("multiunload2");
     try {
       cloudClient.getZkStateReader().getLeaderRetry("multiunload2", "shard1", 30000);
@@ -803,6 +862,8 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
       for (String coreName : resp.getCollectionCoresStatus().keySet()) {
         collectionClients.add(createNewSolrClient(coreName, jettys.get(0).getBaseUrl().toString()));
       }
+      
+      
     }
     
     SolrClient client1 = collectionClients.get(0);
@@ -863,15 +924,36 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
       unloadCmd.setCoreName(props.getCoreName());
 
       String leader = props.getCoreUrl();
-
-      unloadClient.request(unloadCmd);
-
-      int tries = 50;
-      while (leader.equals(zkStateReader.getLeaderUrl(oneInstanceCollection2, "shard1", 10000))) {
-        Thread.sleep(100);
-        if (tries-- == 0) {
-          fail("Leader never changed");
+      
+      testExecutor.execute(new Runnable() {
+        
+        @Override
+        public void run() {
+          try {
+            unloadClient.request(unloadCmd);
+          } catch (SolrServerException e) {
+            throw new RuntimeException(e);
+          } catch (IOException e) {
+            throw new RuntimeException(e);
+          }
         }
+      });
+
+      try {
+        getCommonCloudSolrClient().getZkStateReader().waitForState(oneInstanceCollection2, 20000, TimeUnit.MILLISECONDS, (n, c) -> {
+          
+ 
+          try {
+            if (leader.equals(zkStateReader.getLeaderUrl(oneInstanceCollection2, "shard1", 10000))) {
+              return false;
+            }
+          } catch (InterruptedException e) {
+            throw new RuntimeException(e);
+          }
+          return true;
+        });
+      } catch (TimeoutException | InterruptedException e) {
+        fail("Leader never changed");
       }
     }
 
@@ -1036,10 +1118,10 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
 
     long collection2Docs = otherCollectionClients.get("collection2").get(0)
         .query(new SolrQuery("*:*")).getResults().getNumFound();
-    System.out.println("found2: "+ collection2Docs);
+
     long collection3Docs = otherCollectionClients.get("collection3").get(0)
         .query(new SolrQuery("*:*")).getResults().getNumFound();
-    System.out.println("found3: "+ collection3Docs);
+
     
     SolrQuery query = new SolrQuery("*:*");
     query.set("collection", "collection2,collection3");

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/75b18319/solr/core/src/test/org/apache/solr/cloud/BasicZkTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/BasicZkTest.java b/solr/core/src/test/org/apache/solr/cloud/BasicZkTest.java
index af3174d..d3fec26 100644
--- a/solr/core/src/test/org/apache/solr/cloud/BasicZkTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/BasicZkTest.java
@@ -115,7 +115,7 @@ public class BasicZkTest extends AbstractZkTestCase {
     
     // try a reconnect from disconnect
     zkServer = new ZkTestServer(zkDir, zkPort);
-    zkServer.run();
+    zkServer.run(false);
     
     Thread.sleep(300);
     

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/75b18319/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeTest.java b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeTest.java
index 2b6584e..24d5217 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeTest.java
@@ -23,7 +23,6 @@ import java.util.Set;
 
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.LuceneTestCase.Slow;
-import org.apache.solr.SolrTestCaseJ4.SuppressObjectReleaseTracker;
 import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.client.solrj.impl.CloudSolrClient;
@@ -35,8 +34,6 @@ import org.junit.Test;
 
 @Slow
 @SuppressSSL(bugUrl = "https://issues.apache.org/jira/browse/SOLR-5776")
-//@ThreadLeakLingering(linger = 60000)
-@SuppressObjectReleaseTracker(bugUrl="Testing purposes")
 public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase {
   private static final int FAIL_TOLERANCE = 100;
 
@@ -48,6 +45,9 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
   public static void beforeSuperClass() {
     schemaString = "schema15.xml";      // we need a string id
     System.setProperty("solr.autoCommit.maxTime", "15000");
+    System.clearProperty("solr.httpclient.retries");
+    System.clearProperty("solr.retries.on.forward");
+    System.clearProperty("solr.retries.to.followers"); 
     setErrorHook();
   }
   
@@ -57,10 +57,22 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
     clearErrorHook();
   }
   
+  
+  
+  @Override
+  protected void destroyServers() throws Exception {
+    
+    super.destroyServers();
+  }
+  
   protected static final String[] fieldNames = new String[]{"f_i", "f_f", "f_d", "f_l", "f_dt"};
   protected static final RandVal[] randVals = new RandVal[]{rint, rfloat, rdouble, rlong, rdate};
 
   private int clientSoTimeout = 60000;
+
+  private volatile FullThrottleStoppableIndexingThread ftIndexThread;
+
+  private final boolean runFullThrottle;
   
   public String[] getFieldNames() {
     return fieldNames;
@@ -78,6 +90,16 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
     useFactory("solr.StandardDirectoryFactory");
   }
   
+  @Override
+  public void distribTearDown() throws Exception {
+    try {
+      ftIndexThread.safeStop();
+    } catch (NullPointerException e) {
+      // okay
+    }
+    super.distribTearDown();
+  }
+  
   public ChaosMonkeyNothingIsSafeTest() {
     super();
     sliceCount = Integer.parseInt(System.getProperty("solr.tests.cloud.cm.slicecount", "-1"));
@@ -94,11 +116,15 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
     fixShardCount(numShards);
 
 
+    // TODO: we only do this sometimes so that we can sometimes compare against control,
+    // it's currently hard to know what requests failed when using ConcurrentSolrUpdateServer
+    runFullThrottle = random().nextBoolean();
+    
   }
 
   @Override
   protected boolean useTlogReplicas() {
-    return onlyLeaderIndexes;
+    return false; // TODO: tlog replicas makes commits take way to long due to what is likely a bug and it's TestInjection use
   }
 
   @Override
@@ -119,9 +145,9 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
     // None of the operations used here are particularly costly, so this should work.
     // Using this low timeout will also help us catch index stalling.
     clientSoTimeout = 5000;
-    cloudClient = createCloudClient(DEFAULT_COLLECTION);
+
     boolean testSuccessful = false;
-    try {
+    try  (CloudSolrClient ourCloudClient = createCloudClient(DEFAULT_COLLECTION)) {
       handle.clear();
       handle.put("timestamp", SKIPVAL);
       ZkStateReader zkStateReader = cloudClient.getZkStateReader();
@@ -155,13 +181,9 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
         searchThread.start();
       }
       
-      // TODO: we only do this sometimes so that we can sometimes compare against control,
-      // it's currently hard to know what requests failed when using ConcurrentSolrUpdateServer
-      boolean runFullThrottle = random().nextBoolean();
       if (runFullThrottle) {
-        FullThrottleStoppableIndexingThread ftIndexThread = 
-            new FullThrottleStoppableIndexingThread(controlClient, cloudClient, clients, "ft1", true, this.clientSoTimeout);
-        threads.add(ftIndexThread);
+        ftIndexThread = 
+            new FullThrottleStoppableIndexingThread(cloudClient.getHttpClient(),controlClient, cloudClient, clients, "ft1", true, this.clientSoTimeout);
         ftIndexThread.start();
       }
       
@@ -189,6 +211,11 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
       // ideally this should go into chaosMonkey
       restartZk(1000 * (5 + random().nextInt(4)));
 
+      
+      if (runFullThrottle) {
+        ftIndexThread.safeStop();
+      }
+      
       for (StoppableThread indexThread : threads) {
         indexThread.safeStop();
       }
@@ -219,7 +246,6 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
       zkStateReader.updateLiveNodes();
       assertTrue(zkStateReader.getClusterState().getLiveNodes().size() > 0);
       
-      
       // we expect full throttle fails, but cloud client should not easily fail
       for (StoppableThread indexThread : threads) {
         if (indexThread instanceof StoppableIndexingThread && !(indexThread instanceof FullThrottleStoppableIndexingThread)) {
@@ -230,6 +256,10 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
       }
       
       
+      waitForThingsToLevelOut(20);
+      
+      commit();
+      
       Set<String> addFails = getAddFails(indexTreads);
       Set<String> deleteFails = getDeleteFails(indexTreads);
       // full throttle thread can
@@ -253,7 +283,7 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
 
       // sometimes we restart zookeeper as well
       if (random().nextBoolean()) {
-        restartZk(1000 * (5 + random().nextInt(4)));
+       // restartZk(1000 * (5 + random().nextInt(4)));
       }
 
       try (CloudSolrClient client = createCloudClient("collection1", 30000)) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/75b18319/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeWithPullReplicasTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeWithPullReplicasTest.java b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeWithPullReplicasTest.java
index 67668c9..a63dee3 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeWithPullReplicasTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeWithPullReplicasTest.java
@@ -25,7 +25,6 @@ import java.util.Set;
 import java.util.concurrent.TimeUnit;
 
 import org.apache.lucene.util.LuceneTestCase.Slow;
-import org.apache.solr.SolrTestCaseJ4.SuppressObjectReleaseTracker;
 import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.client.solrj.impl.CloudSolrClient;
@@ -43,12 +42,8 @@ import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering;
-
 @Slow
 @SuppressSSL(bugUrl = "https://issues.apache.org/jira/browse/SOLR-5776")
-@ThreadLeakLingering(linger = 60000)
-@SuppressObjectReleaseTracker(bugUrl="Testing purposes")
 public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDistribZkTestBase {
   private static final int FAIL_TOLERANCE = 100;
 
@@ -71,6 +66,9 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
     if (usually()) {
       System.setProperty("solr.autoCommit.maxTime", "15000");
     }
+    System.clearProperty("solr.httpclient.retries");
+    System.clearProperty("solr.retries.on.forward");
+    System.clearProperty("solr.retries.to.followers"); 
     TestInjection.waitForReplicasInSync = null;
     setErrorHook();
   }
@@ -85,7 +83,11 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
   protected static final String[] fieldNames = new String[]{"f_i", "f_f", "f_d", "f_l", "f_dt"};
   protected static final RandVal[] randVals = new RandVal[]{rint, rfloat, rdouble, rlong, rdate};
 
-  private int clientSoTimeout = 60000;
+  private int clientSoTimeout;
+
+  private volatile FullThrottleStoppableIndexingThread ftIndexThread;
+
+  private final boolean runFullThrottle;
   
   public String[] getFieldNames() {
     return fieldNames;
@@ -103,6 +105,16 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
     useFactory("solr.StandardDirectoryFactory");
   }
   
+  @Override
+  public void distribTearDown() throws Exception {
+    try {
+      ftIndexThread.safeStop();
+    } catch (NullPointerException e) {
+      // okay
+    }
+    super.distribTearDown();
+  }
+  
   public ChaosMonkeyNothingIsSafeWithPullReplicasTest() {
     super();
     numPullReplicas = random().nextInt(TEST_NIGHTLY ? 2 : 1) + 1;
@@ -116,12 +128,12 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
     fixShardCount(numNodes);
     log.info("Starting ChaosMonkey test with {} shards and {} nodes", sliceCount, numNodes);
 
-
+    runFullThrottle = random().nextBoolean();
   }
 
   @Override
   protected boolean useTlogReplicas() {
-    return useTlogReplicas;
+    return false; // TODO: tlog replicas makes commits take way to long due to what is likely a bug and it's TestInjection use
   }
   
   @Override
@@ -140,8 +152,8 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
   public void test() throws Exception {
     // None of the operations used here are particularly costly, so this should work.
     // Using this low timeout will also help us catch index stalling.
-    clientSoTimeout = 5000;
-    cloudClient = createCloudClient(DEFAULT_COLLECTION);
+    clientSoTimeout = 8000;
+
     DocCollection docCollection = cloudClient.getZkStateReader().getClusterState().getCollection(DEFAULT_COLLECTION);
     assertEquals(this.sliceCount, docCollection.getSlices().size());
     Slice s = docCollection.getSlice("shard1");
@@ -162,9 +174,7 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
       }      // make sure we again have leaders for each shard
       
       waitForRecoveriesToFinish(false);
-      
-      // we cannot do delete by query
-      // as it's not supported for recovery
+
       del("*:*");
       
       List<StoppableThread> threads = new ArrayList<>();
@@ -172,7 +182,7 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
       int threadCount = TEST_NIGHTLY ? 3 : 1;
       int i = 0;
       for (i = 0; i < threadCount; i++) {
-        StoppableIndexingThread indexThread = new StoppableIndexingThread(controlClient, cloudClient, Integer.toString(i), true);
+        StoppableIndexingThread indexThread = new StoppableIndexingThread(controlClient, cloudClient, Integer.toString(i), true, 35, 1, true);
         threads.add(indexThread);
         indexTreads.add(indexThread);
         indexThread.start();
@@ -192,13 +202,9 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
         commitThread.start();
       }
       
-      // TODO: we only do this sometimes so that we can sometimes compare against control,
-      // it's currently hard to know what requests failed when using ConcurrentSolrUpdateServer
-      boolean runFullThrottle = random().nextBoolean();
       if (runFullThrottle) {
-        FullThrottleStoppableIndexingThread ftIndexThread = 
-            new FullThrottleStoppableIndexingThread(controlClient, cloudClient, clients, "ft1", true, this.clientSoTimeout);
-        threads.add(ftIndexThread);
+        ftIndexThread = 
+            new FullThrottleStoppableIndexingThread(cloudClient.getHttpClient(), controlClient, cloudClient, clients, "ft1", true, this.clientSoTimeout);
         ftIndexThread.start();
       }
       
@@ -213,7 +219,7 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
             runTimes = new int[] {5000, 6000, 10000, 15000, 25000, 30000,
                 30000, 45000, 90000, 120000};
           } else {
-            runTimes = new int[] {5000, 7000, 15000};
+            runTimes = new int[] {5000, 7000, 10000};
           }
           runLength = runTimes[random().nextInt(runTimes.length - 1)];
         }
@@ -225,6 +231,10 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
       // ideally this should go into chaosMonkey
       restartZk(1000 * (5 + random().nextInt(4)));
 
+      if (runFullThrottle) {
+        ftIndexThread.safeStop();
+      }
+      
       for (StoppableThread indexThread : threads) {
         indexThread.safeStop();
       }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/75b18319/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderTest.java b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderTest.java
index 27ed3a6..25ab99e 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderTest.java
@@ -38,6 +38,9 @@ public class ChaosMonkeySafeLeaderTest extends AbstractFullDistribZkTestBase {
   public static void beforeSuperClass() {
     schemaString = "schema15.xml";      // we need a string id
     System.setProperty("solr.autoCommit.maxTime", "15000");
+    System.clearProperty("solr.httpclient.retries");
+    System.clearProperty("solr.retries.on.forward");
+    System.clearProperty("solr.retries.to.followers"); 
     setErrorHook();
   }
   
@@ -81,7 +84,6 @@ public class ChaosMonkeySafeLeaderTest extends AbstractFullDistribZkTestBase {
   }
 
   @Test
-  // 29-June-2018  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
   public void test() throws Exception {
     
     handle.clear();
@@ -170,7 +172,7 @@ public class ChaosMonkeySafeLeaderTest extends AbstractFullDistribZkTestBase {
     if (random().nextBoolean()) {
       zkServer.shutdown();
       zkServer = new ZkTestServer(zkServer.getZkDir(), zkServer.getPort());
-      zkServer.run();
+      zkServer.run(false);
     }
 
     try (CloudSolrClient client = createCloudClient("collection1")) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/75b18319/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderWithPullReplicasTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderWithPullReplicasTest.java b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderWithPullReplicasTest.java
index 662a5d2..9055c10 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderWithPullReplicasTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderWithPullReplicasTest.java
@@ -23,7 +23,6 @@ import java.util.List;
 import java.util.concurrent.TimeUnit;
 
 import org.apache.lucene.util.LuceneTestCase.Slow;
-import org.apache.solr.SolrTestCaseJ4.SuppressObjectReleaseTracker;
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.impl.CloudSolrClient;
@@ -42,7 +41,6 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 @Slow
-@SuppressObjectReleaseTracker(bugUrl="Testing purposes")
 public class ChaosMonkeySafeLeaderWithPullReplicasTest extends AbstractFullDistribZkTestBase {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
   
@@ -60,7 +58,7 @@ public class ChaosMonkeySafeLeaderWithPullReplicasTest extends AbstractFullDistr
   
   @Override
   protected boolean useTlogReplicas() {
-    return useTlogReplicas;
+    return false; // TODO: tlog replicas makes commits take way to long due to what is likely a bug and it's TestInjection use
   }
 
   @BeforeClass
@@ -69,6 +67,9 @@ public class ChaosMonkeySafeLeaderWithPullReplicasTest extends AbstractFullDistr
     if (usually()) {
       System.setProperty("solr.autoCommit.maxTime", "15000");
     }
+    System.clearProperty("solr.httpclient.retries");
+    System.clearProperty("solr.retries.on.forward");
+    System.clearProperty("solr.retries.to.followers");
     TestInjection.waitForReplicasInSync = null;
     setErrorHook();
   }
@@ -99,8 +100,8 @@ public class ChaosMonkeySafeLeaderWithPullReplicasTest extends AbstractFullDistr
   
   public ChaosMonkeySafeLeaderWithPullReplicasTest() {
     super();
-    numPullReplicas = random().nextInt(TEST_NIGHTLY ? 3 : 2) + 1;;
-    numRealtimeOrTlogReplicas = random().nextInt(TEST_NIGHTLY ? 3 : 2) + 1;;
+    numPullReplicas = random().nextInt(TEST_NIGHTLY ? 3 : 2) + 1;
+    numRealtimeOrTlogReplicas = random().nextInt(TEST_NIGHTLY ? 3 : 2) + 1;
     sliceCount = Integer.parseInt(System.getProperty("solr.tests.cloud.cm.slicecount", "-1"));
     if (sliceCount == -1) {
       sliceCount = random().nextInt(TEST_NIGHTLY ? 3 : 2) + 1;
@@ -219,7 +220,7 @@ public class ChaosMonkeySafeLeaderWithPullReplicasTest extends AbstractFullDistr
     if (random().nextBoolean()) {
       zkServer.shutdown();
       zkServer = new ZkTestServer(zkServer.getZkDir(), zkServer.getPort());
-      zkServer.run();
+      zkServer.run(false);
     }
 
     try (CloudSolrClient client = createCloudClient("collection1")) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/75b18319/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyShardSplitTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyShardSplitTest.java b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyShardSplitTest.java
index 1a13652..50e2443 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyShardSplitTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyShardSplitTest.java
@@ -36,10 +36,12 @@ import org.apache.solr.common.cloud.Slice;
 import org.apache.solr.common.cloud.SolrZkClient;
 import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.core.CloudConfig;
+import org.apache.solr.handler.component.HttpShardHandler;
 import org.apache.solr.handler.component.HttpShardHandlerFactory;
 import org.apache.solr.update.UpdateShardHandler;
 import org.apache.solr.update.UpdateShardHandlerConfig;
 import org.apache.zookeeper.KeeperException;
+import org.junit.BeforeClass;
 import org.junit.Ignore;
 import org.junit.Test;
 import org.slf4j.Logger;
@@ -56,6 +58,13 @@ public class ChaosMonkeyShardSplitTest extends ShardSplitTest {
 
   static final int TIMEOUT = 10000;
   private AtomicInteger killCounter = new AtomicInteger();
+  
+  @BeforeClass
+  public static void beforeSuperClass() {
+    System.clearProperty("solr.httpclient.retries");
+    System.clearProperty("solr.retries.on.forward");
+    System.clearProperty("solr.retries.to.followers"); 
+  }
 
   @Test
   public void test() throws Exception {
@@ -100,7 +109,7 @@ public class ChaosMonkeyShardSplitTest extends ShardSplitTest {
 
       // kill the leader
       CloudJettyRunner leaderJetty = shardToLeaderJetty.get("shard1");
-      chaosMonkey.killJetty(leaderJetty);
+      leaderJetty.jetty.stop();
 
       Thread.sleep(2000);
 
@@ -122,7 +131,7 @@ public class ChaosMonkeyShardSplitTest extends ShardSplitTest {
       }
 
       // bring back dead node
-      ChaosMonkey.start(deadJetty.jetty); // he is not the leader anymore
+      deadJetty.jetty.start(); // he is not the leader anymore
 
       waitTillRecovered();
 
@@ -251,7 +260,7 @@ public class ChaosMonkeyShardSplitTest extends ShardSplitTest {
     LeaderElector overseerElector = new LeaderElector(zkClient);
     UpdateShardHandler updateShardHandler = new UpdateShardHandler(UpdateShardHandlerConfig.DEFAULT);
     // TODO: close Overseer
-    Overseer overseer = new Overseer(new HttpShardHandlerFactory().getShardHandler(), updateShardHandler, "/admin/cores",
+    Overseer overseer = new Overseer((HttpShardHandler) new HttpShardHandlerFactory().getShardHandler(), updateShardHandler, "/admin/cores",
         reader, null, new CloudConfig.CloudConfigBuilder("127.0.0.1", 8983, "solr").build());
     overseer.close();
     ElectionContext ec = new OverseerElectionContext(zkClient, overseer,

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/75b18319/solr/core/src/test/org/apache/solr/cloud/CleanupOldIndexTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/CleanupOldIndexTest.java b/solr/core/src/test/org/apache/solr/cloud/CleanupOldIndexTest.java
index 547de8d..efd8e6d 100644
--- a/solr/core/src/test/org/apache/solr/cloud/CleanupOldIndexTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/CleanupOldIndexTest.java
@@ -96,13 +96,13 @@ public class CleanupOldIndexTest extends SolrCloudTestCase {
     assertTrue(oldIndexDir2.isDirectory());
 
     // bring shard replica down
-    ChaosMonkey.stop(jetty);
+    jetty.stop();
 
     // wait a moment - lets allow some docs to be indexed so replication time is non 0
     Thread.sleep(waitTimes[random().nextInt(waitTimes.length - 1)]);
 
     // bring shard replica up
-    ChaosMonkey.start(jetty);
+    jetty.start();
 
     // make sure replication can start
     Thread.sleep(3000);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/75b18319/solr/core/src/test/org/apache/solr/cloud/CloudTestUtils.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/CloudTestUtils.java b/solr/core/src/test/org/apache/solr/cloud/CloudTestUtils.java
index e93cd58..a1fccd2 100644
--- a/solr/core/src/test/org/apache/solr/cloud/CloudTestUtils.java
+++ b/solr/core/src/test/org/apache/solr/cloud/CloudTestUtils.java
@@ -136,12 +136,12 @@ public class CloudTestUtils {
                                                       boolean requireLeaders) {
     return (liveNodes, collectionState) -> {
       if (collectionState == null) {
-        log.trace("-- null collection");
+        log.info("-- null collection");
         return false;
       }
       Collection<Slice> slices = withInactive ? collectionState.getSlices() : collectionState.getActiveSlices();
       if (slices.size() != expectedShards) {
-        log.trace("-- wrong number of slices, expected={}, found={}: {}", expectedShards, collectionState.getSlices().size(), collectionState.getSlices());
+        log.info("-- wrong number of slices, expected={}, found={}: {}", expectedShards, collectionState.getSlices().size(), collectionState.getSlices());
         return false;
       }
       Set<String> leaderless = new HashSet<>();
@@ -160,14 +160,14 @@ public class CloudTestUtils {
             activeReplicas++;
         }
         if (activeReplicas != expectedReplicas) {
-          log.trace("-- wrong number of active replicas in slice {}, expected={}, found={}", slice.getName(), expectedReplicas, activeReplicas);
+          log.info("-- wrong number of active replicas in slice {}, expected={}, found={}", slice.getName(), expectedReplicas, activeReplicas);
           return false;
         }
       }
       if (leaderless.isEmpty()) {
         return true;
       } else {
-        log.trace("-- shards without leaders: {}", leaderless);
+        log.info("-- shards without leaders: {}", leaderless);
         return false;
       }
     };

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/75b18319/solr/core/src/test/org/apache/solr/cloud/ClusterStateUpdateTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/ClusterStateUpdateTest.java b/solr/core/src/test/org/apache/solr/cloud/ClusterStateUpdateTest.java
index 3658430..3ab04fa 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ClusterStateUpdateTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ClusterStateUpdateTest.java
@@ -22,6 +22,7 @@ import java.util.Map;
 import java.util.Set;
 
 import org.apache.lucene.util.LuceneTestCase.Slow;
+import org.apache.solr.client.solrj.embedded.JettySolrRunner;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.common.cloud.ClusterState;
 import org.apache.solr.common.cloud.DocCollection;
@@ -44,7 +45,6 @@ public class ClusterStateUpdateTest extends SolrCloudTestCase  {
     configureCluster(3)
         .addConfig("conf", configset("cloud-minimal"))
         .configure();
-
   }
 
   @BeforeClass
@@ -112,7 +112,7 @@ public class ClusterStateUpdateTest extends SolrCloudTestCase  {
     assertEquals(3, liveNodes.size());
 
     // shut down node 2
-    cluster.stopJettySolrRunner(2);
+    JettySolrRunner j = cluster.stopJettySolrRunner(2);
 
     // slight pause (15s timeout) for watch to trigger
     for(int i = 0; i < (5 * 15); i++) {
@@ -121,6 +121,8 @@ public class ClusterStateUpdateTest extends SolrCloudTestCase  {
       }
       Thread.sleep(200);
     }
+    
+    cluster.waitForJettyToStop(j);
 
     assertEquals(2, zkController2.getClusterState().getLiveNodes().size());
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/75b18319/solr/core/src/test/org/apache/solr/cloud/CollectionStateFormat2Test.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/CollectionStateFormat2Test.java b/solr/core/src/test/org/apache/solr/cloud/CollectionStateFormat2Test.java
index 91eb461..04da1f5 100644
--- a/solr/core/src/test/org/apache/solr/cloud/CollectionStateFormat2Test.java
+++ b/solr/core/src/test/org/apache/solr/cloud/CollectionStateFormat2Test.java
@@ -20,6 +20,7 @@ import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.common.cloud.DocCollection;
 import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.zookeeper.data.Stat;
+import org.junit.After;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
@@ -31,15 +32,12 @@ public class CollectionStateFormat2Test extends SolrCloudTestCase {
         .addConfig("conf", configset("cloud-minimal"))
         .configure();
   }
-
-  @Test
-  public void testConfNameAndCollectionNameSame() throws Exception {
-
-    // .system collection precreates the configset
-    CollectionAdminRequest.createCollection(".system", 2, 1)
-        .process(cluster.getSolrClient());
+  
+  @After
+  public void afterTest() throws Exception {
+    cluster.deleteAllCollections();
   }
-
+  
   @Test
   public void testZkNodeLocation() throws Exception {
 
@@ -47,6 +45,8 @@ public class CollectionStateFormat2Test extends SolrCloudTestCase {
     CollectionAdminRequest.createCollection(collectionName, "conf", 2, 2)
         .process(cluster.getSolrClient());
 
+    cluster.waitForActiveCollection(collectionName, 2, 4);
+    
     waitForState("Collection not created", collectionName, (n, c) -> DocCollection.isFullyActive(n, c, 2, 2));
     assertTrue("State Format 2 collection path does not exist",
         zkClient().exists(ZkStateReader.getCollectionPath(collectionName), true));

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/75b18319/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java
index 4c3022c..ef19728 100644
--- a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java
@@ -16,6 +16,14 @@
  */
 package org.apache.solr.cloud;
 
+import static java.util.Arrays.asList;
+import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_DEF;
+import static org.apache.solr.common.cloud.ZkStateReader.NRT_REPLICAS;
+import static org.apache.solr.common.cloud.ZkStateReader.NUM_SHARDS_PROP;
+import static org.apache.solr.common.cloud.ZkStateReader.SOLR_AUTOSCALING_CONF_PATH;
+import static org.apache.solr.common.params.CollectionAdminParams.COLLECTION;
+import static org.apache.solr.common.params.CollectionAdminParams.DEFAULTS;
+
 import java.io.IOException;
 import java.nio.file.Path;
 import java.nio.file.Paths;
@@ -26,7 +34,6 @@ import java.util.Objects;
 import java.util.Optional;
 import java.util.concurrent.TimeUnit;
 
-import com.google.common.collect.ImmutableList;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
 import org.apache.solr.client.solrj.SolrRequest;
@@ -51,33 +58,33 @@ import org.apache.solr.common.util.TimeSource;
 import org.apache.solr.common.util.Utils;
 import org.apache.solr.util.TimeOut;
 import org.apache.zookeeper.KeeperException;
+import org.junit.After;
 import org.junit.Before;
-import org.junit.BeforeClass;
 import org.junit.Test;
 
-import static java.util.Arrays.asList;
-import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_DEF;
-import static org.apache.solr.common.cloud.ZkStateReader.NRT_REPLICAS;
-import static org.apache.solr.common.cloud.ZkStateReader.NUM_SHARDS_PROP;
-import static org.apache.solr.common.cloud.ZkStateReader.SOLR_AUTOSCALING_CONF_PATH;
-import static org.apache.solr.common.params.CollectionAdminParams.COLLECTION;
-import static org.apache.solr.common.params.CollectionAdminParams.DEFAULTS;
+import com.google.common.collect.ImmutableList;
 
 @LuceneTestCase.Slow
 public class CollectionsAPISolrJTest extends SolrCloudTestCase {
 
-  @BeforeClass
-  public static void setupCluster() throws Exception {
-    configureCluster(4)
-        .addConfig("conf", configset("cloud-minimal"))
-        .configure();
-  }
-
   @Before
   public void beforeTest() throws Exception {
+    configureCluster(4)
+    .addConfig("conf", configset("cloud-minimal"))
+    .configure();
+    
     // clear any persisted auto scaling configuration
     zkClient().setData(SOLR_AUTOSCALING_CONF_PATH, Utils.toJSON(new ZkNodeProps()), true);
     cluster.deleteAllCollections();
+    
+    final ClusterProperties props = new ClusterProperties(zkClient());
+    CollectionAdminRequest.setClusterProperty(ZkStateReader.LEGACY_CLOUD, null).process(cluster.getSolrClient());
+    assertEquals("Cluster property was not unset", props.getClusterProperty(ZkStateReader.LEGACY_CLOUD, null), null);
+  }
+  
+  @After
+  public void afterTest() throws Exception {
+    shutdownCluster();
   }
 
   /**
@@ -89,6 +96,8 @@ public class CollectionsAPISolrJTest extends SolrCloudTestCase {
     String collectionName = "solrj_default_configset";
     CollectionAdminResponse response = CollectionAdminRequest.createCollection(collectionName, 2, 2)
         .process(cluster.getSolrClient());
+    
+    cluster.waitForActiveCollection(collectionName, 2, 4);
 
     assertEquals(0, response.getStatus());
     assertTrue(response.isSuccess());
@@ -135,6 +144,8 @@ public class CollectionsAPISolrJTest extends SolrCloudTestCase {
           .process(cluster.getSolrClient());
       assertEquals(0, response.getStatus());
       assertTrue(response.isSuccess());
+      
+      cluster.waitForActiveCollection(COLL_NAME, 2, 4);
 
       DocCollection coll = cluster.getSolrClient().getClusterStateProvider().getClusterState().getCollection(COLL_NAME);
       Map<String, Slice> slices = coll.getSlicesMap();
@@ -217,6 +228,7 @@ public class CollectionsAPISolrJTest extends SolrCloudTestCase {
           .process(cluster.getSolrClient());
       assertEquals(0, response.getStatus());
       assertTrue(response.isSuccess());
+      cluster.waitForActiveCollection(COLL_NAME, 2, 4);
 
       DocCollection coll = cluster.getSolrClient().getClusterStateProvider().getClusterState().getCollection(COLL_NAME);
       Map<String, Slice> slices = coll.getSlicesMap();
@@ -321,6 +333,9 @@ public class CollectionsAPISolrJTest extends SolrCloudTestCase {
 
     assertEquals(0, response.getStatus());
     assertTrue(response.isSuccess());
+    
+    cluster.waitForActiveCollection(collectionName, 2, 4);
+    
     String nodeName = (String) response._get("success[0]/key", null);
     String corename = (String) response._get(asList("success", nodeName, "core"), null);
 
@@ -333,7 +348,8 @@ public class CollectionsAPISolrJTest extends SolrCloudTestCase {
   }
 
   @Test
-  public void testCreateAndDeleteShard() throws IOException, SolrServerException {
+  @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-13021")
+  public void testCreateAndDeleteShard() throws Exception {
     // Create an implicit collection
     String collectionName = "solrj_implicit";
     CollectionAdminResponse response
@@ -343,6 +359,9 @@ public class CollectionsAPISolrJTest extends SolrCloudTestCase {
 
     assertEquals(0, response.getStatus());
     assertTrue(response.isSuccess());
+    
+    cluster.waitForActiveCollection(collectionName, 2, 6);
+    
     Map<String, NamedList<Integer>> coresStatus = response.getCollectionCoresStatus();
     assertEquals(6, coresStatus.size());
 
@@ -351,6 +370,9 @@ public class CollectionsAPISolrJTest extends SolrCloudTestCase {
 
     assertEquals(0, response.getStatus());
     assertTrue(response.isSuccess());
+    
+    cluster.getSolrClient().waitForState(collectionName, 30, TimeUnit.SECONDS, (l,c) -> c != null && c.getSlice("shardC") != null); 
+    
     coresStatus = response.getCollectionCoresStatus();
     assertEquals(3, coresStatus.size());
     int replicaTlog = 0;
@@ -395,6 +417,8 @@ public class CollectionsAPISolrJTest extends SolrCloudTestCase {
     CollectionAdminRequest.createCollection(collectionName, "conf", 2, 1)
         .process(cluster.getSolrClient());
 
+    cluster.waitForActiveCollection(collectionName, 2, 2);
+    
     CollectionAdminResponse response = CollectionAdminRequest.splitShard(collectionName)
         .setShardName("shard1")
         .process(cluster.getSolrClient());
@@ -450,6 +474,9 @@ public class CollectionsAPISolrJTest extends SolrCloudTestCase {
 
     assertEquals(0, response.getStatus());
     assertTrue(response.isSuccess());
+    
+    cluster.waitForActiveCollection(collectionName, 1, 1);
+    
     Map<String, NamedList<Integer>> coresStatus = response.getCollectionCoresStatus();
     assertEquals(1, coresStatus.size());
 
@@ -468,6 +495,8 @@ public class CollectionsAPISolrJTest extends SolrCloudTestCase {
     final String collectionName = "solrj_replicatests";
     CollectionAdminRequest.createCollection(collectionName, "conf", 1, 2)
         .process(cluster.getSolrClient());
+    
+    cluster.waitForActiveCollection(collectionName, 1, 2);
 
     ArrayList<String> nodeList
         = new ArrayList<>(cluster.getSolrClient().getZkStateReader().getClusterState().getLiveNodes());
@@ -477,6 +506,9 @@ public class CollectionsAPISolrJTest extends SolrCloudTestCase {
     CollectionAdminResponse response = CollectionAdminRequest.addReplicaToShard(collectionName, "shard1")
         .setNode(node)
         .process(cluster.getSolrClient());
+    
+    cluster.waitForActiveCollection(collectionName, 1, 3);
+    
     Replica newReplica = grabNewReplica(response, getCollectionState(collectionName));
     assertEquals(0, response.getStatus());
     assertTrue(response.isSuccess());
@@ -533,6 +565,8 @@ public class CollectionsAPISolrJTest extends SolrCloudTestCase {
 
     CollectionAdminRequest.createCollection(collectionName, "conf", 2, 2)
         .process(cluster.getSolrClient());
+    
+    cluster.waitForActiveCollection(collectionName, 2, 4);
 
     // Check for value change
     CollectionAdminRequest.setCollectionProperty(collectionName, propName, "false")
@@ -578,6 +612,8 @@ public class CollectionsAPISolrJTest extends SolrCloudTestCase {
     final String collection = "replicaProperties";
     CollectionAdminRequest.createCollection(collection, "conf", 2, 2)
         .process(cluster.getSolrClient());
+    
+    cluster.waitForActiveCollection(collection, 2, 4);
 
     final Replica replica = getCollectionState(collection).getLeader("shard1");
     CollectionAdminResponse response
@@ -604,6 +640,8 @@ public class CollectionsAPISolrJTest extends SolrCloudTestCase {
     final String collection = "balancedProperties";
     CollectionAdminRequest.createCollection(collection, "conf", 2, 2)
         .process(cluster.getSolrClient());
+    
+   cluster.waitForActiveCollection(collection, 2, 4);
 
     CollectionAdminResponse response = CollectionAdminRequest.balanceReplicaProperty(collection, "preferredLeader")
         .process(cluster.getSolrClient());
@@ -629,6 +667,8 @@ public class CollectionsAPISolrJTest extends SolrCloudTestCase {
     final String collection = "testAddAndDeleteCollectionAttribute";
     CollectionAdminRequest.createCollection(collection, "conf", 1, 1)
         .process(cluster.getSolrClient());
+    
+    cluster.waitForActiveCollection(collection, 1, 1);
 
     CollectionAdminRequest.modifyCollection(collection, null)
         .setAttribute("replicationFactor", 25)

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/75b18319/solr/core/src/test/org/apache/solr/cloud/ConnectionManagerTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/ConnectionManagerTest.java b/solr/core/src/test/org/apache/solr/cloud/ConnectionManagerTest.java
index 90d9cc1..6684d34 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ConnectionManagerTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ConnectionManagerTest.java
@@ -47,9 +47,6 @@ public class ConnectionManagerTest extends SolrTestCaseJ4 {
     try {
       server.run();
       
-      AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
-      AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
-      
       SolrZkClient zkClient = new SolrZkClient(server.getZkAddress(), TIMEOUT);
       ConnectionManager cm = zkClient.getConnectionManager();
       try {
@@ -80,33 +77,30 @@ public class ConnectionManagerTest extends SolrTestCaseJ4 {
     try {
       server.run();
 
-      AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
-      AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
-
       SolrZkClient zkClient = new SolrZkClient(server.getZkAddress(), TIMEOUT);
       ConnectionManager cm = zkClient.getConnectionManager();
       try {
         assertFalse(cm.isLikelyExpired());
-        assertTrue(cm.isConnected());
+        assertTrue(cm.isConnectedAndNotClosed());
         cm.process(new WatchedEvent(EventType.None, KeeperState.Disconnected, ""));
         // disconnect shouldn't immediately set likelyExpired
-        assertFalse(cm.isConnected());
+        assertFalse(cm.isConnectedAndNotClosed());
         assertFalse(cm.isLikelyExpired());
 
         // but it should after the timeout
         Thread.sleep((long)(zkClient.getZkClientTimeout() * 1.5));
-        assertFalse(cm.isConnected());
+        assertFalse(cm.isConnectedAndNotClosed());
         assertTrue(cm.isLikelyExpired());
 
         // even if we disconnect immediately again
         cm.process(new WatchedEvent(EventType.None, KeeperState.Disconnected, ""));
-        assertFalse(cm.isConnected());
+        assertFalse(cm.isConnectedAndNotClosed());
         assertTrue(cm.isLikelyExpired());
 
         // reconnect -- should no longer be likely expired
         cm.process(new WatchedEvent(EventType.None, KeeperState.SyncConnected, ""));
         assertFalse(cm.isLikelyExpired());
-        assertTrue(cm.isConnected());
+        assertTrue(cm.isConnectedAndNotClosed());
       } finally {
         cm.close();
         zkClient.close();
@@ -126,9 +120,6 @@ public class ConnectionManagerTest extends SolrTestCaseJ4 {
     ZkTestServer server = new ZkTestServer(zkDir);
     try {
       server.run();
-
-      AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
-      AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
       
       MockZkClientConnectionStrategy strat = new MockZkClientConnectionStrategy();
       SolrZkClient zkClient = new SolrZkClient(server.getZkAddress(), TIMEOUT, strat , null);
@@ -136,12 +127,12 @@ public class ConnectionManagerTest extends SolrTestCaseJ4 {
       
       try {
         assertFalse(cm.isLikelyExpired());
-        assertTrue(cm.isConnected());
+        assertTrue(cm.isConnectedAndNotClosed());
                
         // reconnect -- should no longer be likely expired
         cm.process(new WatchedEvent(EventType.None, KeeperState.Expired, ""));
         assertFalse(cm.isLikelyExpired());
-        assertTrue(cm.isConnected());
+        assertTrue(cm.isConnectedAndNotClosed());
         assertTrue(strat.isExceptionThrow());
       } finally {
         cm.close();