You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2018/11/30 18:10:35 UTC

[08/17] lucene-solr:branch_7x: SOLR-12801: Make massive improvements to the tests.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TestWithCollection.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestWithCollection.java b/solr/core/src/test/org/apache/solr/cloud/TestWithCollection.java
index 52e659a..15a32da 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestWithCollection.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestWithCollection.java
@@ -17,11 +17,13 @@
 
 package org.apache.solr.cloud;
 
+import static org.apache.solr.cloud.autoscaling.AutoScalingHandlerTest.createAutoScalingRequest;
+import static org.apache.solr.common.params.CollectionAdminParams.WITH_COLLECTION;
+
 import java.io.IOException;
 import java.lang.invoke.MethodHandles;
 import java.nio.charset.StandardCharsets;
 import java.util.List;
-import java.util.Map;
 import java.util.Optional;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeUnit;
@@ -39,21 +41,18 @@ import org.apache.solr.cloud.autoscaling.ComputePlanAction;
 import org.apache.solr.cloud.autoscaling.ExecutePlanAction;
 import org.apache.solr.cloud.autoscaling.TriggerActionBase;
 import org.apache.solr.cloud.autoscaling.TriggerEvent;
-import org.apache.solr.common.cloud.ClusterState;
 import org.apache.solr.common.cloud.DocCollection;
 import org.apache.solr.common.cloud.Replica;
 import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.common.util.TimeSource;
 import org.apache.solr.util.LogLevel;
 import org.apache.solr.util.TimeOut;
-import org.junit.BeforeClass;
+import org.junit.After;
+import org.junit.Before;
 import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import static org.apache.solr.cloud.autoscaling.AutoScalingHandlerTest.createAutoScalingRequest;
-import static org.apache.solr.common.params.CollectionAdminParams.WITH_COLLECTION;
-
 /**
  * Tests for co-locating a collection with another collection such that any Collection API
  * always ensures that the co-location is never broken.
@@ -68,30 +67,16 @@ public class TestWithCollection extends SolrCloudTestCase {
 
   private static final int NUM_JETTIES = 2;
 
-  @BeforeClass
-  public static void setupCluster() throws Exception {
+  @Before
+  public void setupCluster() throws Exception {
     configureCluster(NUM_JETTIES)
         .addConfig("conf", configset("cloud-minimal"))
         .configure();
-  }
 
-  @Override
-  public void setUp() throws Exception {
-    super.setUp();
     if (zkClient().exists(ZkStateReader.SOLR_AUTOSCALING_CONF_PATH, true))  {
       zkClient().setData(ZkStateReader.SOLR_AUTOSCALING_CONF_PATH, "{}".getBytes(StandardCharsets.UTF_8), true);
     }
-    ClusterState clusterState = cluster.getSolrClient().getZkStateReader().getClusterState();
-    for (Map.Entry<String, ClusterState.CollectionRef> entry : clusterState.getCollectionStates().entrySet()) {
-      if (entry.getKey().contains("_xyz"))  {
-        try {
-          CollectionAdminRequest.deleteCollection(entry.getKey()).process(cluster.getSolrClient());
-        } catch (Exception e) {
-          log.error("Exception while deleting collection: " + entry.getKey());
-        }
-      }
-    }
-    cluster.deleteAllCollections();
+
     cluster.getSolrClient().setDefaultCollection(null);
 
     cloudManager = cluster.getJettySolrRunner(0).getCoreContainer().getZkController().getSolrCloudManager();
@@ -100,18 +85,11 @@ public class TestWithCollection extends SolrCloudTestCase {
     deleteChildrenRecursively(ZkStateReader.SOLR_AUTOSCALING_NODE_LOST_PATH);
     deleteChildrenRecursively(ZkStateReader.SOLR_AUTOSCALING_NODE_ADDED_PATH);
     LATCH = new CountDownLatch(1);
-
-    int jettys = cluster.getJettySolrRunners().size();
-    if (jettys < NUM_JETTIES) {
-      for (int i = jettys; i < NUM_JETTIES; i++) {
-        cluster.startJettySolrRunner();
-      }
-    } else  {
-      for (int i = jettys; i > NUM_JETTIES; i--) {
-        cluster.stopJettySolrRunner(i - 1);
-      }
-    }
-    cluster.waitForAllNodes(30);
+  }
+  
+  @After
+  public void teardownCluster() throws Exception {
+    shutdownCluster();
   }
 
   private void deleteChildrenRecursively(String path) throws Exception {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/TlogReplayBufferedWhileIndexingTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TlogReplayBufferedWhileIndexingTest.java b/solr/core/src/test/org/apache/solr/cloud/TlogReplayBufferedWhileIndexingTest.java
index 027c7fa..18eabc2 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TlogReplayBufferedWhileIndexingTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TlogReplayBufferedWhileIndexingTest.java
@@ -22,7 +22,6 @@ import java.util.List;
 
 import org.apache.lucene.util.LuceneTestCase.Nightly;
 import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
-import org.apache.solr.SolrTestCaseJ4.SuppressObjectReleaseTracker;
 import org.apache.lucene.util.LuceneTestCase.Slow;
 import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.embedded.JettySolrRunner;
@@ -35,7 +34,6 @@ import org.junit.Test;
 @Slow
 @Nightly
 @SuppressSSL
-@SuppressObjectReleaseTracker(bugUrl="Testing purposes")
 public class TlogReplayBufferedWhileIndexingTest extends AbstractFullDistribZkTestBase {
 
   private List<StoppableIndexingThread> threads;
@@ -79,7 +77,7 @@ public class TlogReplayBufferedWhileIndexingTest extends AbstractFullDistribZkTe
     allJetty.addAll(jettys);
     allJetty.remove(shardToLeaderJetty.get("shard1").jetty);
     assert allJetty.size() == 1 : allJetty.size();
-    ChaosMonkey.stop(allJetty.get(0));
+    allJetty.get(0).stop();
     
     StoppableIndexingThread indexThread;
     for (int i = 0; i < numThreads; i++) {
@@ -92,7 +90,7 @@ public class TlogReplayBufferedWhileIndexingTest extends AbstractFullDistribZkTe
 
     Thread.sleep(2000);
     
-    ChaosMonkey.start(allJetty.get(0));
+    allJetty.get(0).start();
     
     Thread.sleep(45000);
   

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/VMParamsZkACLAndCredentialsProvidersTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/VMParamsZkACLAndCredentialsProvidersTest.java b/solr/core/src/test/org/apache/solr/cloud/VMParamsZkACLAndCredentialsProvidersTest.java
index 95422fa..36fb989 100644
--- a/solr/core/src/test/org/apache/solr/cloud/VMParamsZkACLAndCredentialsProvidersTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/VMParamsZkACLAndCredentialsProvidersTest.java
@@ -63,7 +63,7 @@ public class VMParamsZkACLAndCredentialsProvidersTest extends SolrTestCaseJ4 {
         + "zookeeper/server1/data";
     log.info("ZooKeeper dataDir:" + zkDir);
     zkServer = new ZkTestServer(zkDir);
-    zkServer.run();
+    zkServer.run(false);
     
     System.setProperty("zkHost", zkServer.getZkAddress());
     
@@ -194,7 +194,10 @@ public class VMParamsZkACLAndCredentialsProvidersTest extends SolrTestCaseJ4 {
         zkClient.delete(path + "/subnode", -1, false);
       }
     } catch (NoAuthException nae) {
-      if (create) fail("No NoAuthException expected");
+      if (create) {
+        nae.printStackTrace();
+        fail("No NoAuthException expected");
+      }
       // expected
     }
     

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java b/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java
index 5578452..45c4812 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java
@@ -93,9 +93,6 @@ public class ZkControllerTest extends SolrTestCaseJ4 {
     try {
       server.run();
 
-      AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
-      AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
-
       try (SolrZkClient client = new SolrZkClient(server.getZkAddress(), TIMEOUT)) {
 
         ZkController.createClusterZkNodes(client);
@@ -176,9 +173,6 @@ public class ZkControllerTest extends SolrTestCaseJ4 {
     try {
       server.run();
 
-      AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
-      AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
-
       SolrZkClient zkClient = new SolrZkClient(server.getZkAddress(), TIMEOUT);
       String actualConfigName = "firstConfig";
 
@@ -228,9 +222,6 @@ public class ZkControllerTest extends SolrTestCaseJ4 {
     try {
       server.run();
 
-      AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
-      AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
-
       cc = getCoreContainer();
       ZkController zkController = null;
 
@@ -282,9 +273,6 @@ public class ZkControllerTest extends SolrTestCaseJ4 {
     try {
       server.run();
 
-      AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
-      AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
-
       cc = new MockCoreContainer()  {
         @Override
         public List<CoreDescriptor> getCoreDescriptors() {
@@ -336,8 +324,8 @@ public class ZkControllerTest extends SolrTestCaseJ4 {
         zkController.getZkStateReader().forciblyRefreshAllClusterStateSlow();
 
         long now = System.nanoTime();
-        long timeout = now + TimeUnit.NANOSECONDS.convert(ZkController.WAIT_DOWN_STATES_TIMEOUT_SECONDS, TimeUnit.SECONDS);
-        zkController.publishAndWaitForDownStates();
+        long timeout = now + TimeUnit.NANOSECONDS.convert(5, TimeUnit.SECONDS);
+        zkController.publishAndWaitForDownStates(5);
         assertTrue("The ZkController.publishAndWaitForDownStates should have timed out but it didn't", System.nanoTime() >= timeout);
       } finally {
         if (zkController != null)

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/ZkFailoverTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/ZkFailoverTest.java b/solr/core/src/test/org/apache/solr/cloud/ZkFailoverTest.java
index 42d99f8..39f1810 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ZkFailoverTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ZkFailoverTest.java
@@ -40,16 +40,22 @@ public class ZkFailoverTest extends SolrCloudTestCase {
   }
 
   @AfterClass
-  public static void cleanUp() {
+  public static void cleanUp() throws Exception {
     System.clearProperty("waitForZk");
+
+    for (int i = 0; i < cluster.getJettySolrRunners().size(); i++) {
+      final JettySolrRunner runner = cluster.getJettySolrRunner(i);
+      runner.stop();
+    }
   }
 
   public void testRestartZkWhenClusterDown() throws Exception {
     String coll = "coll1";
     CollectionAdminRequest.createCollection(coll, 2, 1).process(cluster.getSolrClient());
+    cluster.waitForActiveCollection(coll, 2, 2);
     cluster.getSolrClient().add(coll, new SolrInputDocument("id", "1"));
     for (JettySolrRunner runner : cluster.getJettySolrRunners()) {
-      ChaosMonkey.stop(runner);
+      runner.stop();
     }
     ZkTestServer zkTestServer = cluster.getZkServer();
     zkTestServer.shutdown();
@@ -58,7 +64,7 @@ public class ZkFailoverTest extends SolrCloudTestCase {
       final JettySolrRunner runner = cluster.getJettySolrRunner(i);
       threads[i] = new Thread(() -> {
         try {
-          ChaosMonkey.start(runner);
+          runner.start();
         } catch (Exception e) {
           e.printStackTrace();
         }
@@ -67,12 +73,12 @@ public class ZkFailoverTest extends SolrCloudTestCase {
     }
     Thread.sleep(5000);
     zkTestServer = new ZkTestServer(zkTestServer.getZkDir(), zkTestServer.getPort());
-    zkTestServer.run();
+    zkTestServer.run(false);
     for (Thread thread : threads) {
       thread.join();
     }
     waitForLiveNodes(2);
-    waitForState("Timeout waiting for " + coll, coll, clusterShape(2, 1));
+    waitForState("Timeout waiting for " + coll, coll, clusterShape(2, 2));
     QueryResponse rsp = new QueryRequest(new SolrQuery("*:*")).process(cluster.getSolrClient(), coll);
     assertEquals(1, rsp.getResults().getNumFound());
     zkTestServer.shutdown();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/ZkSolrClientTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/ZkSolrClientTest.java b/solr/core/src/test/org/apache/solr/cloud/ZkSolrClientTest.java
index 1376fe8..1e9793d 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ZkSolrClientTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ZkSolrClientTest.java
@@ -21,7 +21,7 @@ import java.util.Set;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
-import junit.framework.Assert;
+
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.common.cloud.SolrZkClient;
 import org.apache.solr.common.cloud.ZkCmdExecutor;
@@ -54,9 +54,6 @@ public class ZkSolrClientTest extends SolrTestCaseJ4 {
       server = new ZkTestServer(zkDir);
       server.run();
 
-      AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
-      if (makeRoot) AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
-
       zkClient = new SolrZkClient(server.getZkAddress(), AbstractZkTestCase.TIMEOUT);
     }
 
@@ -110,46 +107,59 @@ public class ZkSolrClientTest extends SolrTestCaseJ4 {
   public void testReconnect() throws Exception {
     String zkDir = createTempDir("zkData").toFile().getAbsolutePath();
     ZkTestServer server = null;
-    SolrZkClient zkClient = null;
-    try {
-      server = new ZkTestServer(zkDir);
-      server.run();
-      AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
-      AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
+    server = new ZkTestServer(zkDir);
+    server.run();
+    try (SolrZkClient zkClient = new SolrZkClient(server.getZkAddress(), AbstractZkTestCase.TIMEOUT);) {
 
-      zkClient = new SolrZkClient(server.getZkAddress(), AbstractZkTestCase.TIMEOUT);
       String shardsPath = "/collections/collection1/shards";
       zkClient.makePath(shardsPath, false, true);
 
-      zkClient.makePath("collections/collection1", false, true);
       int zkServerPort = server.getPort();
       // this tests disconnect state
       server.shutdown();
 
       Thread.sleep(80);
 
+      Thread thread = new Thread() {
+        public void run() {
+          try {
+            zkClient.makePath("collections/collection2", false);
+           // Assert.fail("Server should be down here");
+          } catch (KeeperException | InterruptedException e) {
 
-      try {
-        zkClient.makePath("collections/collection2", false);
-        Assert.fail("Server should be down here");
-      } catch (KeeperException.ConnectionLossException e) {
+          }
+        }
+      };
 
-      }
+      thread.start();
 
       // bring server back up
       server = new ZkTestServer(zkDir, zkServerPort);
-      server.run();
+      server.run(false);
 
       // TODO: can we do better?
       // wait for reconnect
       Thread.sleep(600);
 
-      try {
-        zkClient.makePath("collections/collection3", true);
-      } catch (KeeperException.ConnectionLossException e) {
-        Thread.sleep(5000); // try again in a bit
-        zkClient.makePath("collections/collection3", true);
-      }
+      Thread thread2 = new Thread() {
+        public void run() {
+          try {
+
+            zkClient.makePath("collections/collection3", true);
+
+          } catch (KeeperException e) {
+            throw new RuntimeException(e);
+          } catch (InterruptedException e) {
+            throw new RuntimeException(e);
+          }
+        }
+      };
+
+      thread2.start();
+
+      thread.join();
+      
+      thread2.join();
 
       assertNotNull(zkClient.exists("/collections/collection3", null, true));
       assertNotNull(zkClient.exists("/collections/collection1", null, true));
@@ -181,9 +191,6 @@ public class ZkSolrClientTest extends SolrTestCaseJ4 {
 
     } finally {
 
-      if (zkClient != null) {
-        zkClient.close();
-      }
       if (server != null) {
         server.shutdown();
       }
@@ -197,8 +204,6 @@ public class ZkSolrClientTest extends SolrTestCaseJ4 {
     try {
       server = new ZkTestServer(zkDir);
       server.run();
-      AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
-      AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
 
       final int timeout = random().nextInt(10000) + 5000;
       

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/api/collections/AssignTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/api/collections/AssignTest.java b/solr/core/src/test/org/apache/solr/cloud/api/collections/AssignTest.java
index d5197ca..638496a 100644
--- a/solr/core/src/test/org/apache/solr/cloud/api/collections/AssignTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/api/collections/AssignTest.java
@@ -16,7 +16,13 @@
  */
 package org.apache.solr.cloud.api.collections;
 
-import java.io.IOException;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.anyBoolean;
+import static org.mockito.ArgumentMatchers.anyInt;
+import static org.mockito.ArgumentMatchers.anyString;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
@@ -42,18 +48,10 @@ import org.apache.solr.common.cloud.Slice;
 import org.apache.solr.common.cloud.SolrZkClient;
 import org.apache.solr.common.util.ExecutorUtil;
 import org.apache.solr.common.util.Utils;
-import org.apache.zookeeper.KeeperException;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 
-import static org.mockito.ArgumentMatchers.any;
-import static org.mockito.ArgumentMatchers.anyBoolean;
-import static org.mockito.ArgumentMatchers.anyInt;
-import static org.mockito.ArgumentMatchers.anyString;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.when;
-
 public class AssignTest extends SolrTestCaseJ4 {
   
   @Override
@@ -109,14 +107,13 @@ public class AssignTest extends SolrTestCaseJ4 {
 
       try (SolrZkClient zkClient = new SolrZkClient(server.getZkAddress(), 10000)) {
         assertTrue(zkClient.isConnected());
-        zkClient.makePath("/", true);
         for (String c : collections) {
-          zkClient.makePath("/collections/"+c, true);
+          zkClient.makePath("/collections/" + c, true);
         }
         // TODO: fix this to be independent of ZK
         ZkDistribStateManager stateManager = new ZkDistribStateManager(zkClient);
         List<Future<?>> futures = new ArrayList<>();
-        for (int i = 0; i < 1000; i++) {
+        for (int i = 0; i < 73; i++) {
           futures.add(executor.submit(() -> {
             String collection = collections[random().nextInt(collections.length)];
             int id = Assign.incAndGetId(stateManager, collection, 0);
@@ -130,7 +127,7 @@ public class AssignTest extends SolrTestCaseJ4 {
           future.get();
         }
       }
-      assertEquals(1000, (long) collectionUniqueIds.values().stream()
+      assertEquals(73, (long) collectionUniqueIds.values().stream()
           .map(ConcurrentHashMap::size)
           .reduce((m1, m2) -> m1 + m2).get());
     } finally {
@@ -141,12 +138,11 @@ public class AssignTest extends SolrTestCaseJ4 {
 
 
   @Test
-  public void testBuildCoreName() throws IOException, InterruptedException, KeeperException {
+  public void testBuildCoreName() throws Exception {
     String zkDir = createTempDir("zkData").toFile().getAbsolutePath();
     ZkTestServer server = new ZkTestServer(zkDir);
     server.run();
     try (SolrZkClient zkClient = new SolrZkClient(server.getZkAddress(), 10000)) {
-      zkClient.makePath("/", true);
       // TODO: fix this to be independent of ZK
       ZkDistribStateManager stateManager = new ZkDistribStateManager(zkClient);
       Map<String, Slice> slices = new HashMap<>();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/api/collections/CollectionsAPIAsyncDistributedZkTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/api/collections/CollectionsAPIAsyncDistributedZkTest.java b/solr/core/src/test/org/apache/solr/cloud/api/collections/CollectionsAPIAsyncDistributedZkTest.java
index 7e939a0..b81b956 100644
--- a/solr/core/src/test/org/apache/solr/cloud/api/collections/CollectionsAPIAsyncDistributedZkTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/api/collections/CollectionsAPIAsyncDistributedZkTest.java
@@ -24,6 +24,7 @@ import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
+
 import org.apache.lucene.util.LuceneTestCase.Slow;
 import org.apache.lucene.util.TestUtil;
 import org.apache.solr.client.solrj.SolrClient;
@@ -39,9 +40,11 @@ import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.cloud.Replica;
 import org.apache.solr.common.cloud.Slice;
+import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.common.util.ExecutorUtil;
 import org.apache.solr.util.DefaultSolrThreadFactory;
-import org.junit.BeforeClass;
+import org.junit.After;
+import org.junit.Before;
 import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -56,12 +59,19 @@ public class CollectionsAPIAsyncDistributedZkTest extends SolrCloudTestCase {
   
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
-  @BeforeClass
-  public static void setupCluster() throws Exception {
+  @Before
+  public void setupCluster() throws Exception {
+    // we recreate per test - they need to be isolated to be solid
     configureCluster(2)
         .addConfig("conf1", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf"))
         .configure();
   }
+  
+  @After
+  public void tearDown() throws Exception {
+    super.tearDown();
+    shutdownCluster();
+  }
 
   @Test
   public void testSolrJAPICalls() throws Exception {
@@ -88,10 +98,14 @@ public class CollectionsAPIAsyncDistributedZkTest extends SolrCloudTestCase {
   }
 
   @Test
-  //commented 9-Aug-2018  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 21-May-2018
-  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // added 20-Sep-2018
   public void testAsyncRequests() throws Exception {
-
+    boolean legacy = random().nextBoolean();
+    if (legacy) {
+      CollectionAdminRequest.setClusterProperty(ZkStateReader.LEGACY_CLOUD, "true").process(cluster.getSolrClient());
+    } else {
+      CollectionAdminRequest.setClusterProperty(ZkStateReader.LEGACY_CLOUD, "false").process(cluster.getSolrClient());
+    }
+    
     final String collection = "testAsyncOperations";
     final CloudSolrClient client = cluster.getSolrClient();
 
@@ -101,6 +115,9 @@ public class CollectionsAPIAsyncDistributedZkTest extends SolrCloudTestCase {
         .processAndWait(client, MAX_TIMEOUT_SECONDS);
     assertSame("CreateCollection task did not complete!", RequestStatusState.COMPLETED, state);
 
+    
+    cluster.waitForActiveCollection(collection, 1, 1);
+    
     //Add a few documents to shard1
     int numDocs = TestUtil.nextInt(random(), 10, 100);
     List<SolrInputDocument> docs = new ArrayList<>(numDocs);
@@ -125,6 +142,8 @@ public class CollectionsAPIAsyncDistributedZkTest extends SolrCloudTestCase {
         .processAndWait(client, MAX_TIMEOUT_SECONDS);
     assertSame("CreateShard did not complete", RequestStatusState.COMPLETED, state);
 
+    client.getZkStateReader().forceUpdateCollection(collection);
+    
     //Add a doc to shard2 to make sure shard2 was created properly
     SolrInputDocument doc = new SolrInputDocument();
     doc.addField("id", numDocs + 1);
@@ -143,14 +162,20 @@ public class CollectionsAPIAsyncDistributedZkTest extends SolrCloudTestCase {
     assertSame("AddReplica did not complete", RequestStatusState.COMPLETED, state);
 
     //cloudClient watch might take a couple of seconds to reflect it
-    Slice shard1 = client.getZkStateReader().getClusterState().getCollection(collection).getSlice("shard1");
-    int count = 0;
-    while (shard1.getReplicas().size() != 2) {
-      if (count++ > 1000) {
-        fail("2nd Replica not reflecting in the cluster state");
+    client.getZkStateReader().waitForState(collection, 20, TimeUnit.SECONDS, (n, c) -> {
+      if (c == null)
+        return false;
+      Slice slice = c.getSlice("shard1");
+      if (slice == null) {
+        return false;
       }
-      Thread.sleep(100);
-    }
+
+      if (slice.getReplicas().size() == 2) {
+        return true;
+      }
+
+      return false;
+    });
 
     state = CollectionAdminRequest.createAlias("myalias",collection)
         .processAndWait(client, MAX_TIMEOUT_SECONDS);
@@ -170,7 +195,8 @@ public class CollectionsAPIAsyncDistributedZkTest extends SolrCloudTestCase {
     } catch (SolrException e) {
       //expected
     }
-
+    
+    Slice shard1 = client.getZkStateReader().getClusterState().getCollection(collection).getSlice("shard1");
     Replica replica = shard1.getReplicas().iterator().next();
     for (String liveNode : client.getZkStateReader().getClusterState().getLiveNodes()) {
       if (!replica.getNodeName().equals(liveNode)) {
@@ -180,20 +206,23 @@ public class CollectionsAPIAsyncDistributedZkTest extends SolrCloudTestCase {
         break;
       }
     }
-
+    client.getZkStateReader().forceUpdateCollection(collection);
+    
     shard1 = client.getZkStateReader().getClusterState().getCollection(collection).getSlice("shard1");
     String replicaName = shard1.getReplicas().iterator().next().getName();
     state = CollectionAdminRequest.deleteReplica(collection, "shard1", replicaName)
       .processAndWait(client, MAX_TIMEOUT_SECONDS);
     assertSame("DeleteReplica did not complete", RequestStatusState.COMPLETED, state);
 
-    state = CollectionAdminRequest.deleteCollection(collection)
-        .processAndWait(client, MAX_TIMEOUT_SECONDS);
-    assertSame("DeleteCollection did not complete", RequestStatusState.COMPLETED, state);
+    if (!legacy) {
+      state = CollectionAdminRequest.deleteCollection(collection)
+          .processAndWait(client, MAX_TIMEOUT_SECONDS);
+      assertSame("DeleteCollection did not complete", RequestStatusState.COMPLETED, state);
+    }
   }
-  // commented 4-Sep-2018  @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2-Aug-2018
-  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 14-Oct-2018
+
   public void testAsyncIdRaceCondition() throws Exception {
+
     SolrClient[] clients = new SolrClient[cluster.getJettySolrRunners().size()];
     int j = 0;
     for (JettySolrRunner r:cluster.getJettySolrRunners()) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/api/collections/CollectionsAPIDistributedZkTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/api/collections/CollectionsAPIDistributedZkTest.java b/solr/core/src/test/org/apache/solr/cloud/api/collections/CollectionsAPIDistributedZkTest.java
index b562e5d..7b9585a 100644
--- a/solr/core/src/test/org/apache/solr/cloud/api/collections/CollectionsAPIDistributedZkTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/api/collections/CollectionsAPIDistributedZkTest.java
@@ -16,9 +16,9 @@
  */
 package org.apache.solr.cloud.api.collections;
 
-import javax.management.MBeanServer;
-import javax.management.MBeanServerFactory;
-import javax.management.ObjectName;
+import static org.apache.solr.common.cloud.ZkStateReader.CORE_NAME_PROP;
+import static org.apache.solr.common.cloud.ZkStateReader.REPLICATION_FACTOR;
+
 import java.io.IOException;
 import java.lang.invoke.MethodHandles;
 import java.lang.management.ManagementFactory;
@@ -38,7 +38,10 @@ import java.util.Optional;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
 
-import com.google.common.collect.ImmutableList;
+import javax.management.MBeanServer;
+import javax.management.MBeanServerFactory;
+import javax.management.ObjectName;
+
 import org.apache.commons.io.IOUtils;
 import org.apache.lucene.util.LuceneTestCase.Slow;
 import org.apache.lucene.util.TestUtil;
@@ -74,14 +77,13 @@ import org.apache.solr.core.SolrCore;
 import org.apache.solr.core.SolrInfoBean.Category;
 import org.apache.solr.util.TestInjection;
 import org.apache.solr.util.TimeOut;
+import org.junit.After;
 import org.junit.Before;
-import org.junit.BeforeClass;
 import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import static org.apache.solr.common.cloud.ZkStateReader.CORE_NAME_PROP;
-import static org.apache.solr.common.cloud.ZkStateReader.REPLICATION_FACTOR;
+import com.google.common.collect.ImmutableList;
 
 /**
  * Tests the Cloud Collections API.
@@ -90,16 +92,14 @@ import static org.apache.solr.common.cloud.ZkStateReader.REPLICATION_FACTOR;
 public class CollectionsAPIDistributedZkTest extends SolrCloudTestCase {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
-  @BeforeClass
-  public static void beforeCollectionsAPIDistributedZkTest() {
+  @Before
+  public void setupCluster() throws Exception {
     // we don't want this test to have zk timeouts
-    System.setProperty("zkClientTimeout", "240000");
-    TestInjection.randomDelayInCoreCreation = "true:20";
+    System.setProperty("zkClientTimeout", "60000");
+    System.setProperty("createCollectionWaitTimeTillActive", "5");
+    TestInjection.randomDelayInCoreCreation = "true:5";
     System.setProperty("validateAfterInactivity", "200");
-  }
-
-  @BeforeClass
-  public static void setupCluster() throws Exception {
+    
     String solrXml = IOUtils.toString(CollectionsAPIDistributedZkTest.class.getResourceAsStream("/solr/solr-jmxreporter.xml"), "UTF-8");
     configureCluster(4)
         .addConfig("conf", configset("cloud-minimal"))
@@ -107,14 +107,11 @@ public class CollectionsAPIDistributedZkTest extends SolrCloudTestCase {
         .withSolrXml(solrXml)
         .configure();
   }
-
-  @Before
-  public void clearCluster() throws Exception {
-    try {
-      cluster.deleteAllCollections();
-    } finally {
-      System.clearProperty("zkClientTimeout");
-    }
+  
+  @After
+  public void tearDownCluster() throws Exception {
+    shutdownCluster();
+    System.clearProperty("createCollectionWaitTimeTillActive");
   }
 
   @Test
@@ -427,6 +424,14 @@ public class CollectionsAPIDistributedZkTest extends SolrCloudTestCase {
     // create new collections rapid fire
     int cnt = random().nextInt(TEST_NIGHTLY ? 3 : 1) + 1;
     CollectionAdminRequest.Create[] createRequests = new CollectionAdminRequest.Create[cnt];
+    
+    class Coll {
+      String name;
+      int numShards;
+      int replicationFactor;
+    }
+    
+    List<Coll> colls = new ArrayList<>();
 
     for (int i = 0; i < cnt; i++) {
 
@@ -438,25 +443,30 @@ public class CollectionsAPIDistributedZkTest extends SolrCloudTestCase {
           = CollectionAdminRequest.createCollection("awhollynewcollection_" + i, "conf2", numShards, replicationFactor)
           .setMaxShardsPerNode(maxShardsPerNode);
       createRequests[i].processAsync(cluster.getSolrClient());
+      
+      Coll coll = new Coll();
+      coll.name = "awhollynewcollection_" + i;
+      coll.numShards = numShards;
+      coll.replicationFactor = replicationFactor;
+      colls.add(coll);
     }
 
-    for (int i = 0; i < cnt; i++) {
-      String collectionName = "awhollynewcollection_" + i;
-      final int j = i;
-      waitForState("Expected to see collection " + collectionName, collectionName,
-          (n, c) -> {
-            CollectionAdminRequest.Create req = createRequests[j];
-            return DocCollection.isFullyActive(n, c, req.getNumShards(), req.getReplicationFactor());
-          });
+    for (Coll coll : colls) {
+      cluster.waitForActiveCollection(coll.name, coll.numShards, coll.numShards * coll.replicationFactor);
     }
 
-    cluster.injectChaos(random());
+    waitForStable(cnt, createRequests);
 
     for (int i = 0; i < cluster.getJettySolrRunners().size(); i++) {
       checkInstanceDirs(cluster.getJettySolrRunner(i));
     }
-
+    
     String collectionName = createRequests[random().nextInt(createRequests.length)].getCollectionName();
+    
+    // TODO: we should not need this...beast test well when trying to fix
+    Thread.sleep(1000);
+    
+    cluster.getSolrClient().getZkStateReader().forciblyRefreshAllClusterStateSlow();
 
     new UpdateRequest()
         .add("id", "6")
@@ -482,6 +492,25 @@ public class CollectionsAPIDistributedZkTest extends SolrCloudTestCase {
     checkNoTwoShardsUseTheSameIndexDir();
   }
 
+  private void waitForStable(int cnt, CollectionAdminRequest.Create[] createRequests) throws InterruptedException {
+    for (int i = 0; i < cnt; i++) {
+      String collectionName = "awhollynewcollection_" + i;
+      final int j = i;
+      waitForState("Expected to see collection " + collectionName, collectionName,
+          (n, c) -> {
+            CollectionAdminRequest.Create req = createRequests[j];
+            return DocCollection.isFullyActive(n, c, req.getNumShards(), req.getReplicationFactor());
+          });
+      
+      ZkStateReader zkStateReader = cluster.getSolrClient().getZkStateReader();
+      // make sure we have leaders for each shard
+      for (int z = 1; z < createRequests[j].getNumShards(); z++) {
+        zkStateReader.getLeaderRetry(collectionName, "shard" + z, 10000);
+      }      // make sure we again have leaders for each shard
+      
+    }
+  }
+
   @Test
   public void testCollectionReload() throws Exception {
 
@@ -619,6 +648,7 @@ public class CollectionsAPIDistributedZkTest extends SolrCloudTestCase {
     CollectionAdminRequest.createCollection(collectionName, "conf", 2, 2)
         .setMaxShardsPerNode(4)
         .process(cluster.getSolrClient());
+    cluster.waitForActiveCollection(collectionName, 2, 4);
 
     ArrayList<String> nodeList
         = new ArrayList<>(cluster.getSolrClient().getZkStateReader().getClusterState().getLiveNodes());

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/api/collections/HdfsCollectionsAPIDistributedZkTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/api/collections/HdfsCollectionsAPIDistributedZkTest.java b/solr/core/src/test/org/apache/solr/cloud/api/collections/HdfsCollectionsAPIDistributedZkTest.java
index ed962ec..20706ef 100644
--- a/solr/core/src/test/org/apache/solr/cloud/api/collections/HdfsCollectionsAPIDistributedZkTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/api/collections/HdfsCollectionsAPIDistributedZkTest.java
@@ -84,7 +84,6 @@ public class HdfsCollectionsAPIDistributedZkTest extends CollectionsAPIDistribut
 
   @Test
   public void moveReplicaTest() throws Exception {
-    cluster.waitForAllNodes(5000);
     String coll = "movereplicatest_coll";
 
     CloudSolrClient cloudClient = cluster.getSolrClient();
@@ -130,7 +129,7 @@ public class HdfsCollectionsAPIDistributedZkTest extends CollectionsAPIDistribut
     checkNumOfCores(cloudClient, replica.getNodeName(), 0);
     checkNumOfCores(cloudClient, targetNode, 2);
 
-    waitForState("Wait for recovery finish failed",coll, clusterShape(2,2));
+    waitForState("Wait for recovery finish failed",coll, clusterShape(2,4));
     slice = cloudClient.getZkStateReader().getClusterState().getCollection(coll).getSlice(slice.getName());
     boolean found = false;
     for (Replica newReplica : slice.getReplicas()) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/api/collections/ShardSplitTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/api/collections/ShardSplitTest.java b/solr/core/src/test/org/apache/solr/cloud/api/collections/ShardSplitTest.java
index 0b474e5..6098ed8 100644
--- a/solr/core/src/test/org/apache/solr/cloud/api/collections/ShardSplitTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/api/collections/ShardSplitTest.java
@@ -28,6 +28,7 @@ import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicReference;
@@ -45,8 +46,8 @@ import org.apache.solr.client.solrj.response.CollectionAdminResponse;
 import org.apache.solr.client.solrj.response.QueryResponse;
 import org.apache.solr.client.solrj.response.RequestStatusState;
 import org.apache.solr.cloud.AbstractDistribZkTestBase;
-import org.apache.solr.cloud.AbstractFullDistribZkTestBase;
-import org.apache.solr.cloud.ChaosMonkey;
+import org.apache.solr.cloud.BasicDistributedZkTest;
+import org.apache.solr.cloud.SolrCloudTestCase;
 import org.apache.solr.cloud.StoppableIndexingThread;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.cloud.ClusterState;
@@ -78,7 +79,7 @@ import static org.apache.solr.common.cloud.ZkStateReader.REPLICATION_FACTOR;
 
 @Slow
 @LogLevel("org.apache.solr.cloud.Overseer=DEBUG;org.apache.solr.cloud.overseer=DEBUG;org.apache.solr.cloud.api.collections=DEBUG;org.apache.solr.cloud.OverseerTaskProcessor=DEBUG;org.apache.solr.util.TestInjection=DEBUG")
-public class ShardSplitTest extends AbstractFullDistribZkTestBase {
+public class ShardSplitTest extends BasicDistributedZkTest {
 
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
@@ -96,7 +97,7 @@ public class ShardSplitTest extends AbstractFullDistribZkTestBase {
   }
 
   @Test
-  // 12-Jun-2018 @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
+  @Nightly
   public void test() throws Exception {
 
     waitForThingsToLevelOut(15);
@@ -143,6 +144,9 @@ public class ShardSplitTest extends AbstractFullDistribZkTestBase {
     create.setMaxShardsPerNode(5); // some high number so we can create replicas without hindrance
     create.setCreateNodeSet(nodeName); // we want to create the leader on a fixed node so that we know which one to restart later
     create.process(cloudClient);
+    
+    cloudClient.waitForState(collectionName, 30, TimeUnit.SECONDS, SolrCloudTestCase.activeClusterShape(1, 1));
+    
     try (CloudSolrClient client = getCloudSolrClient(zkServer.getZkAddress(), true, cloudClient.getLbClient().getHttpClient())) {
       client.setDefaultCollection(collectionName);
       StoppableIndexingThread thread = new StoppableIndexingThread(controlClient, client, "i1", true);
@@ -185,12 +189,14 @@ public class ShardSplitTest extends AbstractFullDistribZkTestBase {
           int liveNodeCount = client.getZkStateReader().getClusterState().getLiveNodes().size();
 
           // restart the sub-shard leader node
+          String stoppedNodeName = null;
           boolean restarted = false;
           for (JettySolrRunner jetty : jettys) {
             int port = jetty.getBaseUrl().getPort();
             if (replica.getStr(BASE_URL_PROP).contains(":" + port))  {
-              ChaosMonkey.kill(jetty);
-              ChaosMonkey.start(jetty);
+              stoppedNodeName = jetty.getNodeName();
+              jetty.stop();
+              jetty.start();
               restarted = true;
               break;
             }
@@ -199,6 +205,8 @@ public class ShardSplitTest extends AbstractFullDistribZkTestBase {
             // sanity check
             fail("We could not find a jetty to kill for replica: " + replica.getCoreUrl());
           }
+          
+          cloudClient.getZkStateReader().waitForLiveNodes(30, TimeUnit.SECONDS, SolrCloudTestCase.containsLiveNode(stoppedNodeName));
 
           // add a new replica for the sub-shard
           CollectionAdminRequest.AddReplica addReplica = CollectionAdminRequest.addReplicaToShard(collectionName, SHARD1_0);
@@ -208,6 +216,9 @@ public class ShardSplitTest extends AbstractFullDistribZkTestBase {
           try (HttpSolrClient control = new HttpSolrClient.Builder(control_collection).withHttpClient(client.getLbClient().getHttpClient()).build())  {
             state = addReplica.processAndWait(control, 30);
           }
+          
+          cloudClient.waitForState(collectionName, 30, TimeUnit.SECONDS, SolrCloudTestCase.activeClusterShape(2, 4));
+          
           if (state == RequestStatusState.COMPLETED)  {
             CountDownLatch newReplicaLatch = new CountDownLatch(1);
             client.getZkStateReader().registerCollectionStateWatcher(collectionName, (liveNodes, collectionState) -> {
@@ -319,6 +330,7 @@ public class ShardSplitTest extends AbstractFullDistribZkTestBase {
   }
 
   @Test
+  @Nightly
   public void testSplitAfterFailedSplit2() throws Exception {
     waitForThingsToLevelOut(15);
 
@@ -345,9 +357,12 @@ public class ShardSplitTest extends AbstractFullDistribZkTestBase {
   private void doSplitMixedReplicaTypes(SolrIndexSplitter.SplitMethod splitMethod) throws Exception {
     waitForThingsToLevelOut(15);
     String collectionName = "testSplitMixedReplicaTypes_" + splitMethod.toLower();
-    CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName, "conf1", 1, 2, 2, 2);
+    CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName, "conf1", 1, 2, 0, 2); // TODO tlog replicas disabled right now.
     create.setMaxShardsPerNode(5); // some high number so we can create replicas without hindrance
     create.process(cloudClient);
+    
+    cloudClient.waitForState(collectionName, 30, TimeUnit.SECONDS, SolrCloudTestCase.activeClusterShape(1, 4));
+    
     waitForRecoveriesToFinish(collectionName, false);
 
     for (int i = 0; i < 100; i++) {
@@ -360,6 +375,8 @@ public class ShardSplitTest extends AbstractFullDistribZkTestBase {
     splitShard.setSplitMethod(splitMethod.toLower());
     CollectionAdminResponse rsp = splitShard.process(cloudClient);
     waitForThingsToLevelOut(30);
+   
+    cloudClient.waitForState(collectionName, 30, TimeUnit.SECONDS, SolrCloudTestCase.activeClusterShape(2, 12));
 
     cloudClient.getZkStateReader().forceUpdateCollection(collectionName);
     ClusterState clusterState = cloudClient.getZkStateReader().getClusterState();
@@ -367,10 +384,10 @@ public class ShardSplitTest extends AbstractFullDistribZkTestBase {
     log.info("coll: " + coll);
 
     // verify the original shard
-    verifyShard(coll, SHARD1, Slice.State.INACTIVE, 2, 2, 2);
+    verifyShard(coll, SHARD1, Slice.State.INACTIVE, 2, 0, 2);
     // verify new sub-shards
-    verifyShard(coll, SHARD1_0, Slice.State.ACTIVE, 2, 2, 2);
-    verifyShard(coll, SHARD1_1, Slice.State.ACTIVE, 2, 2, 2);
+    verifyShard(coll, SHARD1_0, Slice.State.ACTIVE, 2, 0, 2);
+    verifyShard(coll, SHARD1_1, Slice.State.ACTIVE, 2, 0, 2);
   }
 
   private void verifyShard(DocCollection coll, String shard, Slice.State expectedState, int numNrt, int numTlog, int numPull) throws Exception {
@@ -392,6 +409,7 @@ public class ShardSplitTest extends AbstractFullDistribZkTestBase {
   }
 
   @Test
+  @Nightly
   public void testSplitWithChaosMonkey() throws Exception {
     waitForThingsToLevelOut(15);
 
@@ -435,7 +453,7 @@ public class ShardSplitTest extends AbstractFullDistribZkTestBase {
             CloudJettyRunner cjetty = shardToLeaderJetty.get(SHARD1);
             try {
               Thread.sleep(1000 + random().nextInt(500));
-              ChaosMonkey.kill(cjetty);
+              cjetty.jetty.stop();
               stop.set(true);
               return true;
             } catch (Exception e) {
@@ -478,7 +496,7 @@ public class ShardSplitTest extends AbstractFullDistribZkTestBase {
 
       CloudJettyRunner cjetty = shardToLeaderJetty.get(SHARD1);
       log.info("Starting shard1 leader jetty at port {}", cjetty.jetty.getLocalPort());
-      ChaosMonkey.start(cjetty.jetty);
+      cjetty.jetty.start();
       cloudClient.getZkStateReader().forceUpdateCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
       log.info("Current collection state: {}", printClusterStateInfo(AbstractDistribZkTestBase.DEFAULT_COLLECTION));
 
@@ -551,6 +569,9 @@ public class ShardSplitTest extends AbstractFullDistribZkTestBase {
     CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName, "conf1", 1, 2);
     create.setMaxShardsPerNode(5); // some high number so we can create replicas without hindrance
     create.process(cloudClient);
+    
+    cloudClient.waitForState(collectionName, 30, TimeUnit.SECONDS, SolrCloudTestCase.activeClusterShape(1, 2));
+    
     waitForRecoveriesToFinish(collectionName, false);
 
     TestInjection.splitLatch = new CountDownLatch(1); // simulate a long split operation
@@ -625,8 +646,15 @@ public class ShardSplitTest extends AbstractFullDistribZkTestBase {
     String collectionName = "shardSplitWithRule_" + splitMethod.toLower();
     CollectionAdminRequest.Create createRequest = CollectionAdminRequest.createCollection(collectionName, "conf1", 1, 2)
         .setRule("shard:*,replica:<2,node:*");
+
     CollectionAdminResponse response = createRequest.process(cloudClient);
     assertEquals(0, response.getStatus());
+    
+    try {
+      cloudClient.waitForState(collectionName, 30, TimeUnit.SECONDS, SolrCloudTestCase.activeClusterShape(1, 2));
+    } catch (TimeoutException e) {
+      new RuntimeException("Timeout waiting for 1shards and 2 replicas.", e);
+    }
 
     CollectionAdminRequest.SplitShard splitShardRequest = CollectionAdminRequest.splitShard(collectionName)
         .setShardName("shard1").setSplitMethod(splitMethod.toLower());
@@ -784,7 +812,7 @@ public class ShardSplitTest extends AbstractFullDistribZkTestBase {
           OverseerCollectionMessageHandler.NUM_SLICES, numShards,
           "router.field", shard_fld);
 
-      createCollection(collectionInfos, collectionName,props,client);
+      createCollection(collectionInfos, collectionName, props, client);
     }
 
     List<Integer> list = collectionInfos.get(collectionName);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/api/collections/SimpleCollectionCreateDeleteTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/api/collections/SimpleCollectionCreateDeleteTest.java b/solr/core/src/test/org/apache/solr/cloud/api/collections/SimpleCollectionCreateDeleteTest.java
index 0b75bd5..971bb81 100644
--- a/solr/core/src/test/org/apache/solr/cloud/api/collections/SimpleCollectionCreateDeleteTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/api/collections/SimpleCollectionCreateDeleteTest.java
@@ -16,11 +16,20 @@
  */
 package org.apache.solr.cloud.api.collections;
 
+import java.util.Collection;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+
+import org.apache.solr.client.solrj.embedded.JettySolrRunner;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.cloud.AbstractFullDistribZkTestBase;
 import org.apache.solr.cloud.OverseerCollectionConfigSetProcessor;
 import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.TimeSource;
+import org.apache.solr.core.CoreDescriptor;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.util.TimeOut;
 import org.junit.Test;
 
 public class SimpleCollectionCreateDeleteTest extends AbstractFullDistribZkTestBase {
@@ -54,6 +63,32 @@ public class SimpleCollectionCreateDeleteTest extends AbstractFullDistribZkTestB
       cloudClient.request(delete);
 
       assertFalse(cloudClient.getZkStateReader().getZkClient().exists(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collectionName, false));
+      
+      // currently, removing a collection does not wait for cores to be unloaded
+      TimeOut timeout = new TimeOut(30, TimeUnit.SECONDS, TimeSource.NANO_TIME);
+      while (true) {
+        
+        if( timeout.hasTimedOut() ) {
+          throw new TimeoutException("Timed out waiting for all collections to be fully removed.");
+        }
+        
+        boolean allContainersEmpty = true;
+        for(JettySolrRunner jetty : jettys) {
+          
+          Collection<SolrCore> cores = jetty.getCoreContainer().getCores();
+          for (SolrCore core : cores) {
+            CoreDescriptor cd = core.getCoreDescriptor();
+            if (cd != null) {
+              if (cd.getCloudDescriptor().getCollectionName().equals(collectionName)) {
+                allContainersEmpty = false;
+              }
+            }
+          }
+        }
+        if (allContainersEmpty) {
+          break;
+        }
+      }
 
       // create collection again on a node other than the overseer leader
       create = CollectionAdminRequest.createCollection(collectionName,1,1)

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/api/collections/TestCollectionsAPIViaSolrCloudCluster.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/api/collections/TestCollectionsAPIViaSolrCloudCluster.java b/solr/core/src/test/org/apache/solr/cloud/api/collections/TestCollectionsAPIViaSolrCloudCluster.java
index 6ee616f..34355b7 100644
--- a/solr/core/src/test/org/apache/solr/cloud/api/collections/TestCollectionsAPIViaSolrCloudCluster.java
+++ b/solr/core/src/test/org/apache/solr/cloud/api/collections/TestCollectionsAPIViaSolrCloudCluster.java
@@ -88,13 +88,17 @@ public class TestCollectionsAPIViaSolrCloudCluster extends SolrCloudTestCase {
           .setCreateNodeSet(createNodeSet)
           .setProperties(collectionProperties)
           .process(cluster.getSolrClient());
+
+    }
+    
+    if (createNodeSet != null && createNodeSet.equals(OverseerCollectionMessageHandler.CREATE_NODE_SET_EMPTY)) {
+      cluster.waitForActiveCollection(collectionName, numShards, 0);
+    } else {
+      cluster.waitForActiveCollection(collectionName, numShards, numShards * numReplicas);
     }
-    AbstractDistribZkTestBase.waitForRecoveriesToFinish
-        (collectionName, cluster.getSolrClient().getZkStateReader(), true, true, 330);
   }
 
   @Test
-  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
   public void testCollectionCreateSearchDelete() throws Exception {
     final CloudSolrClient client = cluster.getSolrClient();
     final String collectionName = "testcollection";
@@ -108,11 +112,15 @@ public class TestCollectionsAPIViaSolrCloudCluster extends SolrCloudTestCase {
 
     // shut down a server
     JettySolrRunner stoppedServer = cluster.stopJettySolrRunner(0);
+    
+    cluster.waitForJettyToStop(stoppedServer);
+    
     assertTrue(stoppedServer.isStopped());
     assertEquals(nodeCount - 1, cluster.getJettySolrRunners().size());
 
     // create a server
     JettySolrRunner startedServer = cluster.startJettySolrRunner();
+    cluster.waitForAllNodes(30);
     assertTrue(startedServer.isRunning());
     assertEquals(nodeCount, cluster.getJettySolrRunners().size());
 
@@ -153,6 +161,7 @@ public class TestCollectionsAPIViaSolrCloudCluster extends SolrCloudTestCase {
 
     // re-create a server (to restore original nodeCount count)
     startedServer = cluster.startJettySolrRunner(jettyToStop);
+    cluster.waitForAllNodes(30);
     assertTrue(startedServer.isRunning());
     assertEquals(nodeCount, cluster.getJettySolrRunners().size());
 
@@ -162,6 +171,8 @@ public class TestCollectionsAPIViaSolrCloudCluster extends SolrCloudTestCase {
 
     // create it again
     createCollection(collectionName, null);
+    
+    cluster.waitForActiveCollection(collectionName, numShards, numShards * numReplicas);
 
     // check that there's no left-over state
     assertEquals(0, client.query(collectionName, new SolrQuery("*:*")).getResults().getNumFound());
@@ -289,7 +300,8 @@ public class TestCollectionsAPIViaSolrCloudCluster extends SolrCloudTestCase {
         assertTrue(jetty.isRunning());
       }
     }
-    AbstractDistribZkTestBase.waitForRecoveriesToFinish(collectionName, zkStateReader, true, true, 330);
+    cluster.waitForAllNodes(30);
+    cluster.waitForActiveCollection(collectionName, numShards, numShards * numReplicas);
 
     zkStateReader.forceUpdateCollection(collectionName);
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/api/collections/TestHdfsCloudBackupRestore.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/api/collections/TestHdfsCloudBackupRestore.java b/solr/core/src/test/org/apache/solr/cloud/api/collections/TestHdfsCloudBackupRestore.java
index e81bc4b..4d9a30c 100644
--- a/solr/core/src/test/org/apache/solr/cloud/api/collections/TestHdfsCloudBackupRestore.java
+++ b/solr/core/src/test/org/apache/solr/cloud/api/collections/TestHdfsCloudBackupRestore.java
@@ -26,6 +26,8 @@ import java.util.HashMap;
 import java.util.Map;
 import java.util.Properties;
 
+import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
+
 import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.conf.Configuration;
@@ -61,8 +63,7 @@ import static org.apache.solr.core.backup.BackupManager.ZK_STATE_DIR;
 @ThreadLeakFilters(defaultFilters = true, filters = {
     BadHdfsThreadsFilter.class // hdfs currently leaks thread(s)
 })
-//05-Jul-2018  @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 04-May-2018
-//commented 23-AUG-2018  @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2-Aug-2018
+@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-12866")
 public class TestHdfsCloudBackupRestore extends AbstractCloudBackupRestoreTestCase {
   public static final String SOLR_XML = "<solr>\n" +
       "\n" +

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/api/collections/TestLocalFSCloudBackupRestore.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/api/collections/TestLocalFSCloudBackupRestore.java b/solr/core/src/test/org/apache/solr/cloud/api/collections/TestLocalFSCloudBackupRestore.java
index 83a6947..e697889 100644
--- a/solr/core/src/test/org/apache/solr/cloud/api/collections/TestLocalFSCloudBackupRestore.java
+++ b/solr/core/src/test/org/apache/solr/cloud/api/collections/TestLocalFSCloudBackupRestore.java
@@ -16,15 +16,16 @@
  */
 package org.apache.solr.cloud.api.collections;
 
+import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
 /**
- * This class implements the tests for local file-system integration for Solr backup/restore capability.
- * Note that the Solr backup/restore still requires a "shared" file-system. Its just that in this case
- * such file-system would be exposed via local file-system API.
+ * This class implements the tests for local file-system integration for Solr backup/restore capability. Note that the
+ * Solr backup/restore still requires a "shared" file-system. Its just that in this case such file-system would be
+ * exposed via local file-system API.
  */
-//commented 9-Aug-2018 @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2-Aug-2018
+@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-12866")
 public class TestLocalFSCloudBackupRestore extends AbstractCloudBackupRestoreTestCase {
   private static String backupLocation;
 
@@ -59,8 +60,7 @@ public class TestLocalFSCloudBackupRestore extends AbstractCloudBackupRestoreTes
 
   @Override
   @Test
-  //Commented 14-Oct-2018 @BadApple(bugUrl = "https://issues.apache.org/jira/browse/SOLR-12028") // added 09-Aug-2018
   public void test() throws Exception {
     super.test();
   }
-  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/autoscaling/AutoAddReplicasIntegrationTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/AutoAddReplicasIntegrationTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/AutoAddReplicasIntegrationTest.java
index 3c40d8b..5ad5764 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/AutoAddReplicasIntegrationTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/AutoAddReplicasIntegrationTest.java
@@ -17,6 +17,8 @@
 
 package org.apache.solr.cloud.autoscaling;
 
+import static org.apache.solr.common.util.Utils.makeMap;
+
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
@@ -41,18 +43,17 @@ import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.TimeSource;
 import org.apache.solr.util.LogLevel;
 import org.apache.solr.util.TimeOut;
-import org.junit.BeforeClass;
+import org.junit.After;
+import org.junit.Before;
 import org.junit.Test;
 
-import static org.apache.solr.common.util.Utils.makeMap;
-
 @LogLevel("org.apache.solr.cloud.autoscaling=DEBUG;org.apache.solr.client.solrj.cloud.autoscaling=DEBUG;org.apache.solr.cloud=DEBUG;org.apache.solr.cloud.Overseer=DEBUG;org.apache.solr.cloud.overseer=DEBUG;")
 public class AutoAddReplicasIntegrationTest extends SolrCloudTestCase {
   private static final String COLLECTION1 =  "testSimple1";
   private static final String COLLECTION2 =  "testSimple2";
 
-  @BeforeClass
-  public static void setupCluster() throws Exception {
+  @Before
+  public void setupCluster() throws Exception {
     configureCluster(3)
         .addConfig("conf", configset("cloud-minimal"))
         .withSolrXml(TEST_PATH().resolve("solr.xml"))
@@ -64,11 +65,15 @@ public class AutoAddReplicasIntegrationTest extends SolrCloudTestCase {
         .build()
         .process(cluster.getSolrClient());
   }
+  
+  @After
+  public void tearDown() throws Exception {
+    shutdownCluster();
+    super.tearDown();
+  }
 
   @Test
   // This apparently fails in both subclasses.
-  // 12-Jun-2018 @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
-  // commented 15-Sep-2018 @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2-Aug-2018
   public void testSimple() throws Exception {
     JettySolrRunner jetty1 = cluster.getJettySolrRunner(0);
     JettySolrRunner jetty2 = cluster.getJettySolrRunner(1);
@@ -97,25 +102,36 @@ public class AutoAddReplicasIntegrationTest extends SolrCloudTestCase {
     String lostNodeName = lostJetty.getNodeName();
     List<Replica> replacedHdfsReplicas = getReplacedSharedFsReplicas(COLLECTION1, zkStateReader, lostNodeName);
     lostJetty.stop();
+    
+    cluster.waitForJettyToStop(lostJetty);
+    
     waitForNodeLeave(lostNodeName);
+    
     // ensure that 2 shards have 2 active replicas and only 4 replicas in total
     // i.e. old replicas have been deleted.
     // todo remove the condition for total replicas == 4 after SOLR-11591 is fixed
-    waitForState("Waiting for collection " + COLLECTION1, COLLECTION1, (liveNodes, collectionState) -> clusterShape(2, 2).matches(liveNodes, collectionState)
-        && collectionState.getReplicas().size() == 4);
+    waitForState("Waiting for collection " + COLLECTION1, COLLECTION1, (liveNodes, collectionState) -> clusterShape(2, 4).matches(liveNodes, collectionState)
+        && collectionState.getReplicas().size() == 4, 90, TimeUnit.SECONDS);
     checkSharedFsReplicasMovedCorrectly(replacedHdfsReplicas, zkStateReader, COLLECTION1);
     lostJetty.start();
+    
+    cluster.waitForAllNodes(30);
+    
     assertTrue("Timeout waiting for all live and active", ClusterStateUtil.waitForAllActiveAndLiveReplicas(cluster.getSolrClient().getZkStateReader(), 90000));
 
     // check cluster property is considered
     disableAutoAddReplicasInCluster();
     lostNodeName = jetty3.getNodeName();
     jetty3.stop();
+    
+    cluster.waitForJettyToStop(jetty3);
+    
     waitForNodeLeave(lostNodeName);
-    waitForState("Waiting for collection " + COLLECTION1, COLLECTION1, clusterShape(2, 1));
-    jetty3.start();
+    
     waitForState("Waiting for collection " + COLLECTION1, COLLECTION1, clusterShape(2, 2));
-    waitForState("Waiting for collection " + COLLECTION2, COLLECTION2, clusterShape(2, 2));
+    jetty3.start();
+    waitForState("Waiting for collection " + COLLECTION1, COLLECTION1, clusterShape(2, 4));
+    waitForState("Waiting for collection " + COLLECTION2, COLLECTION2, clusterShape(2, 4));
     enableAutoAddReplicasInCluster();
 
 
@@ -132,10 +148,14 @@ public class AutoAddReplicasIntegrationTest extends SolrCloudTestCase {
 
     lostNodeName = jetty2.getNodeName();
     replacedHdfsReplicas = getReplacedSharedFsReplicas(COLLECTION2, zkStateReader, lostNodeName);
+    
     jetty2.stop();
+    
+    cluster.waitForJettyToStop(jetty2);
+    
     waitForNodeLeave(lostNodeName);
-    waitForState("Waiting for collection " + COLLECTION1, COLLECTION1, clusterShape(2, 2));
-    waitForState("Waiting for collection " + COLLECTION2, COLLECTION2, clusterShape(2, 2));
+    waitForState("Waiting for collection " + COLLECTION1, COLLECTION1, clusterShape(2, 4), 45, TimeUnit.SECONDS);
+    waitForState("Waiting for collection " + COLLECTION2, COLLECTION2, clusterShape(2, 4), 45, TimeUnit.SECONDS);
     checkSharedFsReplicasMovedCorrectly(replacedHdfsReplicas, zkStateReader, COLLECTION2);
 
     // overseer failover test..

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/autoscaling/AutoAddReplicasPlanActionTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/AutoAddReplicasPlanActionTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/AutoAddReplicasPlanActionTest.java
index 31bd2fd..1c6d4a8 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/AutoAddReplicasPlanActionTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/AutoAddReplicasPlanActionTest.java
@@ -17,38 +17,49 @@
 
 package org.apache.solr.cloud.autoscaling;
 
+import static org.apache.solr.cloud.autoscaling.AutoScalingHandlerTest.createAutoScalingRequest;
+
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Optional;
 import java.util.concurrent.TimeUnit;
 import java.util.stream.Collectors;
 
 import org.apache.solr.client.solrj.SolrRequest;
 import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType;
 import org.apache.solr.client.solrj.embedded.JettySolrRunner;
+import org.apache.solr.client.solrj.impl.CloudSolrClient;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.client.solrj.request.V2Request;
 import org.apache.solr.cloud.CloudDescriptor;
 import org.apache.solr.cloud.SolrCloudTestCase;
+import org.apache.solr.common.SolrException;
 import org.apache.solr.common.cloud.ClusterStateUtil;
 import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.common.params.CollectionParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.NamedList; 
 import org.apache.solr.common.util.SuppressForbidden;
-import org.apache.solr.common.util.TimeSource;
-import org.apache.solr.util.TimeOut;
+import org.junit.After;
+import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
-import static org.apache.solr.cloud.autoscaling.AutoScalingHandlerTest.createAutoScalingRequest;
-
 public class AutoAddReplicasPlanActionTest extends SolrCloudTestCase{
-
+  
   @BeforeClass
   public static void setupCluster() throws Exception {
+    System.setProperty("solr.httpclient.retries", "4");
+    System.setProperty("solr.retries.on.forward", "1");
+    System.setProperty("solr.retries.to.followers", "1"); 
+
+  }
+  
+  @Before
+  public void beforeTest() throws Exception {
     configureCluster(3)
         .addConfig("conf", configset("cloud-minimal"))
         .configure();
@@ -59,6 +70,11 @@ public class AutoAddReplicasPlanActionTest extends SolrCloudTestCase{
         .build()
         .process(cluster.getSolrClient());
   }
+  
+  @After 
+  public void afterTest() throws Exception {
+    shutdownCluster();
+  }
 
   @Test
   @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
@@ -85,7 +101,11 @@ public class AutoAddReplicasPlanActionTest extends SolrCloudTestCase{
         .setAutoAddReplicas(false)
         .setMaxShardsPerNode(3)
         .process(cluster.getSolrClient());
-
+    
+    cluster.waitForActiveCollection(collection1, 2, 4);
+    cluster.waitForActiveCollection(collection2, 1, 2);
+    cluster.waitForActiveCollection("testSimple3", 3, 3);
+    
     // we remove the implicit created trigger, so the replicas won't be moved
     String removeTriggerCommand = "{" +
         "'remove-trigger' : {" +
@@ -102,34 +122,71 @@ public class AutoAddReplicasPlanActionTest extends SolrCloudTestCase{
     List<CloudDescriptor> cloudDescriptors = lostJetty.getCoreContainer().getCores().stream()
         .map(solrCore -> solrCore.getCoreDescriptor().getCloudDescriptor())
         .collect(Collectors.toList());
+    
+    ZkStateReader reader = cluster.getSolrClient().getZkStateReader();
+
     lostJetty.stop();
-    waitForNodeLeave(lostNodeName);
+    
+    cluster.waitForJettyToStop(lostJetty);
+
+    reader.waitForLiveNodes(30, TimeUnit.SECONDS, missingLiveNode(lostNodeName));
+
 
     List<SolrRequest> operations = getOperations(jetty3, lostNodeName);
     assertOperations(collection1, operations, lostNodeName, cloudDescriptors,  null);
 
     lostJetty.start();
-    ClusterStateUtil.waitForAllActiveAndLiveReplicas(cluster.getSolrClient().getZkStateReader(), 30000);
+    cluster.waitForAllNodes(30);
+    
+    cluster.waitForActiveCollection(collection1, 2, 4);
+    cluster.waitForActiveCollection(collection2, 1, 2);
+    cluster.waitForActiveCollection("testSimple3", 3, 3);
+    
+    assertTrue("Timeout waiting for all live and active", ClusterStateUtil.waitForAllActiveAndLiveReplicas(cluster.getSolrClient().getZkStateReader(), 30000));
+    
     String setClusterPreferencesCommand = "{" +
         "'set-cluster-preferences': [" +
         "{'minimize': 'cores','precision': 0}]" +
         "}";
     req = createAutoScalingRequest(SolrRequest.METHOD.POST, setClusterPreferencesCommand);
-    response = cluster.getSolrClient().request(req);
+    
+    // you can hit a stale connection from pool when restarting jetty
+    try (CloudSolrClient cloudClient = new CloudSolrClient.Builder(Collections.singletonList(cluster.getZkServer().getZkAddress()),
+        Optional.empty())
+            .withSocketTimeout(45000).withConnectionTimeout(15000).build()) {
+      response = cloudClient.request(req);
+    }
+
     assertEquals(response.get("result").toString(), "success");
 
     lostJetty = random().nextBoolean()? jetty1 : jetty2;
-    lostNodeName = lostJetty.getNodeName();
+    String lostNodeName2 = lostJetty.getNodeName();
     cloudDescriptors = lostJetty.getCoreContainer().getCores().stream()
         .map(solrCore -> solrCore.getCoreDescriptor().getCloudDescriptor())
         .collect(Collectors.toList());
+    
+
+    
     lostJetty.stop();
-    waitForNodeLeave(lostNodeName);
+   
+    reader.waitForLiveNodes(30, TimeUnit.SECONDS, missingLiveNode(lostNodeName2));
 
-    operations = getOperations(jetty3, lostNodeName);
-    assertOperations(collection1, operations, lostNodeName, cloudDescriptors, jetty3);
+    try {
+      operations = getOperations(jetty3, lostNodeName2);
+    } catch (SolrException e) {
+      // we might get a stale connection from the pool after jetty restarts
+      operations = getOperations(jetty3, lostNodeName2);
+    }
+    
+    assertOperations(collection1, operations, lostNodeName2, cloudDescriptors, jetty3);
 
     lostJetty.start();
+    cluster.waitForAllNodes(30);
+    
+    cluster.waitForActiveCollection(collection1, 2, 4);
+    cluster.waitForActiveCollection(collection2, 1, 2);
+    cluster.waitForActiveCollection("testSimple3", 3, 3);
+    
     assertTrue("Timeout waiting for all live and active", ClusterStateUtil.waitForAllActiveAndLiveReplicas(cluster.getSolrClient().getZkStateReader(), 30000));
 
     new CollectionAdminRequest.AsyncCollectionAdminRequest(CollectionParams.CollectionAction.MODIFYCOLLECTION) {
@@ -142,22 +199,16 @@ public class AutoAddReplicasPlanActionTest extends SolrCloudTestCase{
       }
     }.process(cluster.getSolrClient());
     lostJetty = jetty1;
-    lostNodeName = lostJetty.getNodeName();
+    String lostNodeName3 = lostJetty.getNodeName();
+    
     lostJetty.stop();
-    waitForNodeLeave(lostNodeName);
-    operations = getOperations(jetty3, lostNodeName);
+    
+    reader.waitForLiveNodes(30, TimeUnit.SECONDS, missingLiveNode(lostNodeName3));
+    
+    operations = getOperations(jetty3, lostNodeName3);
     assertNull(operations);
   }
 
-  private void waitForNodeLeave(String lostNodeName) throws InterruptedException {
-    ZkStateReader reader = cluster.getSolrClient().getZkStateReader();
-    TimeOut timeOut = new TimeOut(10, TimeUnit.SECONDS, TimeSource.NANO_TIME);
-    while (reader.getClusterState().getLiveNodes().contains(lostNodeName)) {
-      Thread.sleep(100);
-      if (timeOut.hasTimedOut()) fail("Wait for " + lostNodeName + " to leave failed!");
-    }
-  }
-
   @SuppressForbidden(reason = "Needs currentTimeMillis to create unique id")
   private List<SolrRequest> getOperations(JettySolrRunner actionJetty, String lostNodeName) throws Exception {
     try (AutoAddReplicasPlanAction action = new AutoAddReplicasPlanAction()) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/autoscaling/ComputePlanActionTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/ComputePlanActionTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/ComputePlanActionTest.java
index 99eca6c..7227c8c 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/ComputePlanActionTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/ComputePlanActionTest.java
@@ -102,7 +102,8 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
         for (int i1 = 0; i1 < jettySolrRunners.size(); i1++) {
           JettySolrRunner jettySolrRunner = jettySolrRunners.get(i1);
           if (jettySolrRunner == randomJetty) {
-            cluster.stopJettySolrRunner(i1);
+            JettySolrRunner j = cluster.stopJettySolrRunner(i1);
+            cluster.waitForJettyToStop(j);
             break;
           }
         }
@@ -168,8 +169,7 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
   }
 
   @Test
-  //28-June-2018 @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 21-May-2018
-  // commented 4-Sep-2018  @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2-Aug-2018
+  @LuceneTestCase.AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") 
   public void testNodeLost() throws Exception  {
     // let's start a node so that we have at least two
     JettySolrRunner runner = cluster.startJettySolrRunner();
@@ -237,7 +237,8 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
     for (int i = 0; i < cluster.getJettySolrRunners().size(); i++) {
       JettySolrRunner jettySolrRunner = cluster.getJettySolrRunners().get(i);
       if (jettySolrRunner == node2)  {
-        cluster.stopJettySolrRunner(i);
+        JettySolrRunner j = cluster.stopJettySolrRunner(i);
+        cluster.waitForJettyToStop(j);
         break;
       }
     }
@@ -275,12 +276,14 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
     assertEquals(response.get("result").toString(), "success");
 
     CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection("testNodeWithMultipleReplicasLost",
-        "conf",2, 3);
+        "conf", 2, 3);
     create.setMaxShardsPerNode(2);
     create.process(solrClient);
+    
+    cluster.waitForActiveCollection("testNodeWithMultipleReplicasLost", 2, 6);
 
     waitForState("Timed out waiting for replicas of new collection to be active",
-        "testNodeWithMultipleReplicasLost", clusterShape(2, 3));
+        "testNodeWithMultipleReplicasLost", clusterShape(2, 6));
 
     ClusterState clusterState = cluster.getSolrClient().getZkStateReader().getClusterState();
     DocCollection docCollection = clusterState.getCollection("testNodeWithMultipleReplicasLost");
@@ -294,14 +297,14 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
       if (replicas != null && replicas.size() == 2) {
         stoppedNodeName = jettySolrRunner.getNodeName();
         replicasToBeMoved = replicas;
-        cluster.stopJettySolrRunner(i);
+        JettySolrRunner j = cluster.stopJettySolrRunner(i);
+        cluster.waitForJettyToStop(j);
         break;
       }
     }
     assertNotNull(stoppedNodeName);
-    cluster.waitForAllNodes(30);
 
-    assertTrue("Trigger was not fired even after 5 seconds", triggerFiredLatch.await(5, TimeUnit.SECONDS));
+    assertTrue("Trigger was not fired even after 5 seconds", triggerFiredLatch.await(15, TimeUnit.SECONDS));
     assertTrue(fired.get());
 
     TriggerEvent triggerEvent = eventRef.get();
@@ -451,25 +454,29 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
     assertEquals(response.get("result").toString(), "success");
 
     CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection("testSelected1",
-        "conf",2, 2);
+        "conf", 2, 2);
     create.process(solrClient);
 
     create = CollectionAdminRequest.createCollection("testSelected2",
-        "conf",2, 2);
+        "conf", 2, 2);
     create.process(solrClient);
 
     create = CollectionAdminRequest.createCollection("testSelected3",
-        "conf",2, 2);
+        "conf", 2, 2);
     create.process(solrClient);
-
+    
+    cluster.waitForActiveCollection("testSelected1", 2, 4);
+    cluster.waitForActiveCollection("testSelected2", 2, 4);
+    cluster.waitForActiveCollection("testSelected3", 2, 4);
+    
     waitForState("Timed out waiting for replicas of new collection to be active",
-        "testSelected1", clusterShape(2, 2));
+        "testSelected1", clusterShape(2, 4));
 
     waitForState("Timed out waiting for replicas of new collection to be active",
-        "testSelected2", clusterShape(2, 2));
+        "testSelected2", clusterShape(2, 4));
 
     waitForState("Timed out waiting for replicas of new collection to be active",
-        "testSelected3", clusterShape(2, 2));
+        "testSelected3", clusterShape(2, 4));
 
     // find a node that has replicas from all collections
     SolrCloudManager cloudManager = cluster.getJettySolrRunner(0).getCoreContainer().getZkController().getSolrCloudManager();
@@ -486,7 +493,8 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
     String node = nodes.get(0);
     for (int i = 0; i < cluster.getJettySolrRunners().size(); i++) {
       if (cluster.getJettySolrRunner(i).getNodeName().equals(node)) {
-        cluster.stopJettySolrRunner(i);
+        JettySolrRunner j = cluster.stopJettySolrRunner(i);
+        cluster.waitForJettyToStop(j);
         break;
       }
     }
@@ -563,6 +571,7 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
             collectionState.getReplicas().stream().allMatch(replica -> replica.isActive(liveNodes)));
 
     JettySolrRunner newNode = cluster.startJettySolrRunner();
+    cluster.waitForAllNodes(30);
     assertTrue(triggerFiredLatch.await(30, TimeUnit.SECONDS));
     assertTrue(fired.get());
     Map actionContext = actionContextPropsRef.get();
@@ -674,6 +683,6 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
     }
 
     waitForState("Timed out waiting for all shards to have only 1 replica",
-        collectionNamePrefix + "_0", clusterShape(numShards, 1));
+        collectionNamePrefix + "_0", clusterShape(numShards, numShards));
   }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/autoscaling/ExecutePlanActionTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/ExecutePlanActionTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/ExecutePlanActionTest.java
index c15bc53..cbd0bac 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/ExecutePlanActionTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/ExecutePlanActionTest.java
@@ -45,6 +45,7 @@ import org.apache.solr.core.SolrResourceLoader;
 import org.apache.solr.util.LogLevel;
 import org.apache.solr.common.util.TimeSource;
 import org.apache.zookeeper.data.Stat;
+import org.junit.After;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
@@ -67,28 +68,29 @@ public class ExecutePlanActionTest extends SolrCloudTestCase {
 
   @BeforeClass
   public static void setupCluster() throws Exception {
-    configureCluster(NODE_COUNT)
-        .addConfig("conf", configset("cloud-minimal"))
-        .configure();
+
   }
 
   @Before
   public void setUp() throws Exception  {
     super.setUp();
+    
+    configureCluster(NODE_COUNT)
+    .addConfig("conf", configset("cloud-minimal"))
+    .configure();
+    
     // clear any persisted auto scaling configuration
     Stat stat = zkClient().setData(SOLR_AUTOSCALING_CONF_PATH, Utils.toJSON(new ZkNodeProps()), true);
 
-    if (cluster.getJettySolrRunners().size() < NODE_COUNT) {
-      // start some to get to original state
-      int numJetties = cluster.getJettySolrRunners().size();
-      for (int i = 0; i < NODE_COUNT - numJetties; i++) {
-        cluster.startJettySolrRunner();
-      }
-    }
-    cluster.waitForAllNodes(30);
-    loader = cluster.getJettySolrRunner(0).getCoreContainer().getResourceLoader();
+
     cloudManager = cluster.getJettySolrRunner(0).getCoreContainer().getZkController().getSolrCloudManager();
-    cluster.deleteAllCollections();
+  }
+  
+
+  @After
+  public void tearDown() throws Exception  {
+    shutdownCluster();
+    super.tearDown();
   }
 
   @Test
@@ -99,6 +101,8 @@ public class ExecutePlanActionTest extends SolrCloudTestCase {
         "conf", 1, 2);
     create.setMaxShardsPerNode(1);
     create.process(solrClient);
+    
+    cluster.waitForActiveCollection(collectionName, 1, 2);
 
     waitForState("Timed out waiting for replicas of new collection to be active",
         collectionName, clusterShape(1, 2));
@@ -189,6 +193,8 @@ public class ExecutePlanActionTest extends SolrCloudTestCase {
         "conf", 1, 2);
     create.setMaxShardsPerNode(1);
     create.process(solrClient);
+    
+    cluster.waitForActiveCollection(collectionName, 1, 2);
 
     waitForState("Timed out waiting for replicas of new collection to be active",
         collectionName, clusterShape(1, 2));
@@ -209,11 +215,13 @@ public class ExecutePlanActionTest extends SolrCloudTestCase {
     for (int i = 0; i < cluster.getJettySolrRunners().size(); i++) {
       JettySolrRunner runner = cluster.getJettySolrRunner(i);
       if (runner == sourceNode) {
-        cluster.stopJettySolrRunner(i);
+        JettySolrRunner j = cluster.stopJettySolrRunner(i);
+        cluster.waitForJettyToStop(j);
       }
     }
+    
+    Thread.sleep(1000);
 
-    cluster.waitForAllNodes(30);
     waitForState("Timed out waiting for replicas of collection to be 2 again",
         collectionName, clusterShape(1, 2));
 
@@ -221,6 +229,6 @@ public class ExecutePlanActionTest extends SolrCloudTestCase {
     docCollection = clusterState.getCollection(collectionName);
     List<Replica> replicasOnSurvivor = docCollection.getReplicas(survivor.getNodeName());
     assertNotNull(replicasOnSurvivor);
-    assertEquals(2, replicasOnSurvivor.size());
+    assertEquals(docCollection.toString(), 2, replicasOnSurvivor.size());
   }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/autoscaling/HdfsAutoAddReplicasIntegrationTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/HdfsAutoAddReplicasIntegrationTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/HdfsAutoAddReplicasIntegrationTest.java
index cedf713..72d3c32 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/HdfsAutoAddReplicasIntegrationTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/HdfsAutoAddReplicasIntegrationTest.java
@@ -17,6 +17,7 @@
 
 package org.apache.solr.cloud.autoscaling;
 
+import com.carrotsearch.randomizedtesting.annotations.Nightly;
 import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.lucene.util.LuceneTestCase;
@@ -33,6 +34,7 @@ import org.junit.BeforeClass;
     MoveReplicaHDFSTest.ForkJoinThreadsFilter.class
 })
 //commented 23-AUG-2018 @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // added 20-Jul-2018
+@Nightly
 public class HdfsAutoAddReplicasIntegrationTest extends AutoAddReplicasIntegrationTest {
 
   private static MiniDFSCluster dfsCluster;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/eb652b84/solr/core/src/test/org/apache/solr/cloud/autoscaling/HttpTriggerListenerTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/HttpTriggerListenerTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/HttpTriggerListenerTest.java
index eeb1a87..26c13b0 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/HttpTriggerListenerTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/HttpTriggerListenerTest.java
@@ -114,6 +114,7 @@ public class HttpTriggerListenerTest extends SolrCloudTestCase {
     assertEquals(requests.toString(), 0, requests.size());
 
     cluster.startJettySolrRunner();
+    cluster.waitForAllNodes(30);
     boolean await = triggerFiredLatch.await(20, TimeUnit.SECONDS);
     assertTrue("The trigger did not fire at all", await);