You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ho...@apache.org on 2019/01/22 19:15:11 UTC

[lucene-solr] branch branch_7x updated: SOLR-13140: harden SearchRateTriggerIntegrationTest by using more absolute rate thresholds and latches to track when all events have been processed so we don't need to 'guess' about sleep calls

This is an automated email from the ASF dual-hosted git repository.

hossman pushed a commit to branch branch_7x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/branch_7x by this push:
     new 6882f43  SOLR-13140: harden SearchRateTriggerIntegrationTest by using more absolute rate thresholds and latches to track when all events have been processed so we don't need to 'guess' about sleep calls
6882f43 is described below

commit 6882f43b96c6dc0fec7a1677d6687fa83f5a1669
Author: Chris Hostetter <ho...@apache.org>
AuthorDate: Tue Jan 22 11:47:11 2019 -0700

    SOLR-13140: harden SearchRateTriggerIntegrationTest by using more absolute rate thresholds and latches to track when all events have been processed so we don't need to 'guess' about sleep calls
    
    This commit also disables testDeleteNode pending an AwaitsFix on SOLR-13163
    
    (cherry picked from commit 15e5ca999ff7e912653db897781b21642d5307f0)
---
 .../SearchRateTriggerIntegrationTest.java          | 678 ++++++++++-----------
 1 file changed, 336 insertions(+), 342 deletions(-)

diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/SearchRateTriggerIntegrationTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/SearchRateTriggerIntegrationTest.java
index 08b45ca..a53a389 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/SearchRateTriggerIntegrationTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/SearchRateTriggerIntegrationTest.java
@@ -29,7 +29,6 @@ import java.util.concurrent.atomic.AtomicInteger;
 import com.carrotsearch.randomizedtesting.annotations.Nightly;
 import com.google.common.util.concurrent.AtomicDouble;
 import org.apache.lucene.util.LuceneTestCase;
-import org.apache.solr.client.solrj.SolrRequest;
 import org.apache.solr.client.solrj.cloud.SolrCloudManager;
 import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
 import org.apache.solr.client.solrj.cloud.autoscaling.ReplicaInfo;
@@ -37,7 +36,6 @@ import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventProcessorStage
 import org.apache.solr.client.solrj.impl.CloudSolrClient;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.cloud.CloudTestUtils;
-import org.apache.solr.cloud.CloudTestUtils.AutoScalingRequest;
 import org.apache.solr.cloud.SolrCloudTestCase;
 import org.apache.solr.common.MapWriter;
 import org.apache.solr.common.cloud.Replica;
@@ -71,10 +69,11 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
   private static final TimeSource timeSource = TimeSource.NANO_TIME;
-  private static CountDownLatch listenerCreated = new CountDownLatch(1);
-  private static Map<String, List<CapturedEvent>> listenerEvents = new HashMap<>();
-  private static CountDownLatch finished = new CountDownLatch(1);
-  private static CountDownLatch started = new CountDownLatch(1);
+  private static volatile CountDownLatch listenerCreated = new CountDownLatch(1);
+  private static volatile CountDownLatch listenerEventLatch = new CountDownLatch(0);
+  private static volatile Map<String, List<CapturedEvent>> listenerEvents = new HashMap<>();
+  private static volatile CountDownLatch finished = new CountDownLatch(1);
+  private static volatile CountDownLatch started = new CountDownLatch(1);
   private static SolrCloudManager cloudManager;
 
   private int waitForSeconds;
@@ -99,15 +98,17 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
     // clear any persisted auto scaling configuration
     Stat stat = zkClient().setData(SOLR_AUTOSCALING_CONF_PATH, Utils.toJSON(new ZkNodeProps()), true);
     log.info(SOLR_AUTOSCALING_CONF_PATH + " reset, new znode version {}", stat.getVersion());
-    timeSource.sleep(5000);
     deleteChildrenRecursively(ZkStateReader.SOLR_AUTOSCALING_EVENTS_PATH);
     deleteChildrenRecursively(ZkStateReader.SOLR_AUTOSCALING_TRIGGER_STATE_PATH);
     deleteChildrenRecursively(ZkStateReader.SOLR_AUTOSCALING_NODE_LOST_PATH);
     deleteChildrenRecursively(ZkStateReader.SOLR_AUTOSCALING_NODE_ADDED_PATH);
-
+    
     finished = new CountDownLatch(1);
     started = new CountDownLatch(1);
+    listenerCreated = new CountDownLatch(1);
     listenerEvents = new HashMap<>();
+    listenerEventLatch = new CountDownLatch(0);
+    
     waitForSeconds = 3 + random().nextInt(5);
   }
 
@@ -126,100 +127,94 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
     CloudTestUtils.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
         CloudTestUtils.clusterShape(1, 2));
 
-    // the trigger is initially disabled so that we have the time to set up listeners
-    // and generate the traffic
-    String setTriggerCommand = "{" +
-        "'set-trigger' : {" +
-        "'name' : 'search_rate_trigger1'," +
-        "'event' : 'searchRate'," +
-        "'waitFor' : '" + waitForSeconds + "s'," +
-        "'enabled' : false," +
-        "'collections' : '" + COLL1 + "'," +
-        "'aboveRate' : 1.0," +
-        "'belowRate' : 0.1," +
-        "'actions' : [" +
-        "{'name':'compute','class':'" + ComputePlanAction.class.getName() + "'}," +
-        "{'name':'execute','class':'" + ExecutePlanAction.class.getName() + "'}" +
-        "]" +
-        "}}";
-    SolrRequest req = AutoScalingRequest.create(SolrRequest.METHOD.POST, setTriggerCommand);
-    NamedList<Object> response = solrClient.request(req);
-    assertEquals(response.get("result").toString(), "success");
-
-    String setListenerCommand = "{" +
-        "'set-listener' : " +
-        "{" +
-        "'name' : 'started'," +
-        "'trigger' : 'search_rate_trigger1'," +
-        "'stage' : ['STARTED']," +
-        "'class' : '" + StartedProcessingListener.class.getName() + "'" +
-        "}" +
-        "}";
-    req = AutoScalingRequest.create(SolrRequest.METHOD.POST, setListenerCommand);
-    response = solrClient.request(req);
-    assertEquals(response.get("result").toString(), "success");
-
-    setListenerCommand = "{" +
-        "'set-listener' : " +
-        "{" +
-        "'name' : 'srt'," +
-        "'trigger' : 'search_rate_trigger1'," +
-        "'stage' : ['FAILED','SUCCEEDED']," +
-        "'afterAction': ['compute', 'execute']," +
-        "'class' : '" + CapturingTriggerListener.class.getName() + "'" +
-        "}" +
-        "}";
-    req = AutoScalingRequest.create(SolrRequest.METHOD.POST, setListenerCommand);
-    response = solrClient.request(req);
-    assertEquals(response.get("result").toString(), "success");
-
-    setListenerCommand = "{" +
-        "'set-listener' : " +
-        "{" +
-        "'name' : 'finished'," +
-        "'trigger' : 'search_rate_trigger1'," +
-        "'stage' : ['SUCCEEDED']," +
-        "'class' : '" + FinishedProcessingListener.class.getName() + "'" +
-        "}" +
-        "}";
-    req = AutoScalingRequest.create(SolrRequest.METHOD.POST, setListenerCommand);
-    response = solrClient.request(req);
-    assertEquals(response.get("result").toString(), "success");
+    CloudTestUtils.assertAutoScalingRequest
+      (cloudManager, 
+       // the trigger is initially disabled so that we have the time to set up listeners
+       // and generate the traffic
+       "{" +
+       "'set-trigger' : {" +
+       "'name' : 'search_rate_trigger1'," +
+       "'event' : 'searchRate'," +
+       "'waitFor' : '" + waitForSeconds + "s'," +
+       "'enabled' : false," +
+       "'collections' : '" + COLL1 + "'," +
+       "'aboveRate' : 1.0," +
+       "'belowRate' : 0.1," +
+       "'actions' : [" +
+       "{'name':'compute','class':'" + ComputePlanAction.class.getName() + "'}," +
+       "{'name':'execute','class':'" + ExecutePlanAction.class.getName() + "'}" +
+       "]" +
+       "}}");
+
+    CloudTestUtils.assertAutoScalingRequest
+      (cloudManager, 
+       "{" +
+       "'set-listener' : " +
+       "{" +
+       "'name' : 'started'," +
+       "'trigger' : 'search_rate_trigger1'," +
+       "'stage' : ['STARTED']," +
+       "'class' : '" + StartedProcessingListener.class.getName() + "'" +
+       "}" +
+       "}");
+
+    CloudTestUtils.assertAutoScalingRequest
+      (cloudManager, 
+       "{" +
+       "'set-listener' : " +
+       "{" +
+       "'name' : 'srt'," +
+       "'trigger' : 'search_rate_trigger1'," +
+       "'stage' : ['FAILED','SUCCEEDED']," +
+       "'afterAction': ['compute', 'execute']," +
+       "'class' : '" + CapturingTriggerListener.class.getName() + "'" +
+       "}" +
+       "}");
+    listenerEventLatch = new CountDownLatch(3);
+
+    CloudTestUtils.assertAutoScalingRequest
+      (cloudManager, 
+       "{" +
+       "'set-listener' : " +
+       "{" +
+       "'name' : 'finished'," +
+       "'trigger' : 'search_rate_trigger1'," +
+       "'stage' : ['SUCCEEDED']," +
+       "'class' : '" + FinishedProcessingListener.class.getName() + "'" +
+       "}" +
+       "}");
 
     SolrParams query = params(CommonParams.Q, "*:*");
     for (int i = 0; i < 500; i++) {
       solrClient.query(COLL1, query);
     }
-
+    
     // enable the trigger
-    String resumeTriggerCommand = "{" +
-        "'resume-trigger' : {" +
-        "'name' : 'search_rate_trigger1'" +
-        "}" +
-        "}";
-    req = AutoScalingRequest.create(SolrRequest.METHOD.POST, resumeTriggerCommand);
-    response = solrClient.request(req);
-    assertEquals(response.get("result").toString(), "success");
-
-    timeSource.sleep(TimeUnit.MILLISECONDS.convert(waitForSeconds + 1, TimeUnit.SECONDS));
-
-    boolean await = started.await(20, TimeUnit.SECONDS);
-    assertTrue("The trigger did not fire at all", await);
-
-    await = finished.await(60, TimeUnit.SECONDS);
-    assertTrue("The trigger did not finish processing", await);
+    CloudTestUtils.assertAutoScalingRequest
+      (cloudManager, 
+       "{" +
+       "'resume-trigger' : {" +
+       "'name' : 'search_rate_trigger1'" +
+       "}" +
+       "}");
+
+    assertTrue("The trigger did not start in a reasonable amount of time",
+               started.await(60, TimeUnit.SECONDS));
+    
+    assertTrue("The trigger did not finish in a reasonable amount of time",
+               started.await(60, TimeUnit.SECONDS));
+    
+    assertTrue("the listener should have recorded all events w/in a reasonable amount of time",
+               listenerEventLatch.await(60, TimeUnit.SECONDS));
 
     // suspend the trigger
-    String suspendTriggerCommand = "{" +
-        "'suspend-trigger' : {" +
-        "'name' : 'search_rate_trigger1'" +
-        "}" +
-        "}";
-    req = AutoScalingRequest.create(SolrRequest.METHOD.POST, suspendTriggerCommand);
-    response = solrClient.request(req);
-    assertEquals(response.get("result").toString(), "success");
-
-    timeSource.sleep(5000);
+    CloudTestUtils.assertAutoScalingRequest
+      (cloudManager, 
+       "{" +
+       "'suspend-trigger' : {" +
+       "'name' : 'search_rate_trigger1'" +
+       "}" +
+       "}");
 
     List<CapturedEvent> events = listenerEvents.get("srt");
     assertEquals(listenerEvents.toString(), 3, events.size());
@@ -274,8 +269,6 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
   }
 
   @Test
-  //17-Aug-2018 commented  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 21-May-2018
-  // commented out on: 24-Dec-2018   @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // added 15-Sep-2018
   public void testBelowSearchRate() throws Exception {
     CloudSolrClient solrClient = cluster.getSolrClient();
     String COLL1 = "belowRate_collection";
@@ -295,100 +288,87 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
     CloudTestUtils.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
         CloudTestUtils.clusterShape(1, 5));
 
-    String setTriggerCommand = "{" +
-        "'set-trigger' : {" +
-        "'name' : 'search_rate_trigger2'," +
-        "'event' : 'searchRate'," +
-        "'waitFor' : '" + waitForSeconds + "s'," +
-        "'enabled' : false," +
-        "'collections' : '" + COLL1 + "'," +
-        "'aboveRate' : 1.0," +
-        // RecoveryStrategy calls /admin/ping, which calls /select so this may not be zero
-        // even when no external requests were made
-        "'belowRate' : 0.3," +
-        "'aboveNodeRate' : 1.0," +
-        "'belowNodeRate' : 0.3," +
-        // do nothing but generate an op
-        "'belowNodeOp' : 'none'," +
-        "'actions' : [" +
-        "{'name':'compute','class':'" + ComputePlanAction.class.getName() + "'}," +
-        "{'name':'execute','class':'" + ExecutePlanAction.class.getName() + "'}" +
-        "]" +
-        "}}";
-    SolrRequest req = AutoScalingRequest.create(SolrRequest.METHOD.POST, setTriggerCommand);
-    NamedList<Object> response = solrClient.request(req);
-    assertEquals(response.get("result").toString(), "success");
-
-    String setListenerCommand = "{" +
-        "'set-listener' : " +
-        "{" +
-        "'name' : 'started'," +
-        "'trigger' : 'search_rate_trigger2'," +
-        "'stage' : ['STARTED']," +
-        "'class' : '" + StartedProcessingListener.class.getName() + "'" +
-        "}" +
-        "}";
-    req = AutoScalingRequest.create(SolrRequest.METHOD.POST, setListenerCommand);
-    response = solrClient.request(req);
-    assertEquals(response.get("result").toString(), "success");
-
-    setListenerCommand = "{" +
-        "'set-listener' : " +
-        "{" +
-        "'name' : 'srt'," +
-        "'trigger' : 'search_rate_trigger2'," +
-        "'stage' : ['FAILED','SUCCEEDED']," +
-        "'afterAction': ['compute', 'execute']," +
-        "'class' : '" + CapturingTriggerListener.class.getName() + "'" +
-        "}" +
-        "}";
-    req = AutoScalingRequest.create(SolrRequest.METHOD.POST, setListenerCommand);
-    response = solrClient.request(req);
-    assertEquals(response.get("result").toString(), "success");
-
-    setListenerCommand = "{" +
-        "'set-listener' : " +
-        "{" +
-        "'name' : 'finished'," +
-        "'trigger' : 'search_rate_trigger2'," +
-        "'stage' : ['SUCCEEDED']," +
-        "'class' : '" + FinishedProcessingListener.class.getName() + "'" +
-        "}" +
-        "}";
-    req = AutoScalingRequest.create(SolrRequest.METHOD.POST, setListenerCommand);
-    response = solrClient.request(req);
-    assertEquals(response.get("result").toString(), "success");
-
-    timeSource.sleep(TimeUnit.MILLISECONDS.convert(waitForSeconds + 1, TimeUnit.SECONDS));
+    CloudTestUtils.assertAutoScalingRequest
+      (cloudManager, 
+       "{" +
+       "'set-trigger' : {" +
+       "'name' : 'search_rate_trigger2'," +
+       "'event' : 'searchRate'," +
+       "'waitFor' : '" + waitForSeconds + "s'," +
+       "'enabled' : false," +
+       "'collections' : '" + COLL1 + "'," +
+       "'aboveRate' : 1.0," +
+       "'aboveNodeRate' : 1.0," +
+       // RecoveryStrategy calls /admin/ping, which calls /select so the rate may not be zero
+       // even when no external requests were made .. but it's hard to predict exactly
+       // what it will be.  use an insanely high rate so all shards/nodes are suspect
+       // and produce an Op regardless of how much internal traffic is produced...
+       "'belowRate' : 1.0," +
+       "'belowNodeRate' : 1.0," +
+       // ...but do absolutely nothing to nodes except generate an 'NONE' Op
+       "'belowNodeOp' : 'none'," +
+       "'actions' : [" +
+       "{'name':'compute','class':'" + ComputePlanAction.class.getName() + "'}," +
+       "{'name':'execute','class':'" + ExecutePlanAction.class.getName() + "'}" +
+       "]" +
+       "}}");
+
+    CloudTestUtils.assertAutoScalingRequest
+      (cloudManager, 
+       "{" +
+       "'set-listener' : " +
+       "{" +
+       "'name' : 'started'," +
+       "'trigger' : 'search_rate_trigger2'," +
+       "'stage' : ['STARTED']," +
+       "'class' : '" + StartedProcessingListener.class.getName() + "'" +
+       "}" +
+       "}");
+
+    CloudTestUtils.assertAutoScalingRequest
+      (cloudManager, 
+       "{" +
+       "'set-listener' : " +
+       "{" +
+       "'name' : 'srt'," +
+       "'trigger' : 'search_rate_trigger2'," +
+       "'stage' : ['FAILED','SUCCEEDED']," +
+       "'afterAction': ['compute', 'execute']," +
+       "'class' : '" + CapturingTriggerListener.class.getName() + "'" +
+       "}" +
+       "}");
+    listenerEventLatch = new CountDownLatch(3);
+    
+    CloudTestUtils.assertAutoScalingRequest
+      (cloudManager, 
+       "{" +
+       "'set-listener' : " +
+       "{" +
+       "'name' : 'finished'," +
+       "'trigger' : 'search_rate_trigger2'," +
+       "'stage' : ['SUCCEEDED']," +
+       "'class' : '" + FinishedProcessingListener.class.getName() + "'" +
+       "}" +
+       "}");
+
+    // Explicitly Do Nothing Here
 
     // enable the trigger
-    String resumeTriggerCommand = "{" +
-        "'resume-trigger' : {" +
-        "'name' : 'search_rate_trigger2'" +
-        "}" +
-        "}";
-    req = AutoScalingRequest.create(SolrRequest.METHOD.POST, resumeTriggerCommand);
-    response = solrClient.request(req);
-    assertEquals(response.get("result").toString(), "success");
-
-    timeSource.sleep(TimeUnit.MILLISECONDS.convert(waitForSeconds + 1, TimeUnit.SECONDS));
-
-    boolean await = started.await(20, TimeUnit.SECONDS);
-    assertTrue("The trigger did not fire at all", await);
-    await = finished.await(60, TimeUnit.SECONDS);
-    assertTrue("The trigger did not finish processing", await);
+    final String resumeTriggerCommand = "{ 'resume-trigger' : { 'name' : 'search_rate_trigger2' } }";
+    CloudTestUtils.assertAutoScalingRequest(cloudManager, resumeTriggerCommand);
 
-    // suspend the trigger
-    String suspendTriggerCommand = "{" +
-        "'suspend-trigger' : {" +
-        "'name' : 'search_rate_trigger2'" +
-        "}" +
-        "}";
-    req = AutoScalingRequest.create(SolrRequest.METHOD.POST, suspendTriggerCommand);
-    response = solrClient.request(req);
-    assertEquals(response.get("result").toString(), "success");
+    assertTrue("The trigger did not start in a reasonable amount of time",
+               started.await(60, TimeUnit.SECONDS));
+    
+    assertTrue("The trigger did not finish in a reasonable amount of time",
+               started.await(60, TimeUnit.SECONDS));
+    
+    assertTrue("the listener should have recorded all events w/in a reasonable amount of time",
+               listenerEventLatch.await(60, TimeUnit.SECONDS));
 
-    timeSource.sleep(TimeUnit.MILLISECONDS.convert(waitForSeconds + 1, TimeUnit.SECONDS));
+    // suspend the trigger
+    final String suspendTriggerCommand = "{ 'suspend-trigger' : { 'name' : 'search_rate_trigger2' } }";
+    CloudTestUtils.assertAutoScalingRequest(cloudManager, suspendTriggerCommand);
 
     List<CapturedEvent> events = listenerEvents.get("srt");
     assertEquals(events.toString(), 3, events.size());
@@ -396,8 +376,8 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
     assertEquals(ev.toString(), "compute", ev.actionName);
     List<TriggerEvent.Op> ops = (List<TriggerEvent.Op>)ev.event.getProperty(TriggerEvent.REQUESTED_OPS);
     assertNotNull("there should be some requestedOps: " + ev.toString(), ops);
-    // 4 cold nodes, 3 cold replicas
-    assertEquals(ops.toString(), 7, ops.size());
+    // 5 cold nodes, 3 cold replicas
+    assertEquals(ops.toString(), 5 + 3, ops.size());
     AtomicInteger coldNodes = new AtomicInteger();
     AtomicInteger coldReplicas = new AtomicInteger();
     ops.forEach(op -> {
@@ -409,7 +389,7 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
         fail("unexpected op: " + op);
       }
     });
-    assertEquals("cold nodes", 4, coldNodes.get());
+    assertEquals("cold nodes", 5, coldNodes.get());
     assertEquals("cold replicas", 3, coldReplicas.get());
 
     // now the collection should be down to RF = 2
@@ -417,28 +397,26 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
         CloudTestUtils.clusterShape(1, 2));
 
     listenerEvents.clear();
+    listenerEventLatch = new CountDownLatch(3);
     finished = new CountDownLatch(1);
     started = new CountDownLatch(1);
 
     // resume trigger
-    req = AutoScalingRequest.create(SolrRequest.METHOD.POST, resumeTriggerCommand);
-    response = solrClient.request(req);
-    assertEquals(response.get("result").toString(), "success");
-
-    // there should be only coldNode ops now, and no coldReplica ops since searchable RF == collection RF
-    timeSource.sleep(TimeUnit.MILLISECONDS.convert(waitForSeconds + 1, TimeUnit.SECONDS));
+    CloudTestUtils.assertAutoScalingRequest(cloudManager, resumeTriggerCommand);
 
-    await = started.await(20, TimeUnit.SECONDS);
-    assertTrue("The trigger did not fire at all", await);
-    await = finished.await(60, TimeUnit.SECONDS);
-    assertTrue("The trigger did not finish processing", await);
+    assertTrue("The trigger did not start in a reasonable amount of time",
+               started.await(60, TimeUnit.SECONDS));
+    
+    assertTrue("The trigger did not finish in a reasonable amount of time",
+               started.await(60, TimeUnit.SECONDS));
+    
+    assertTrue("the listener should have recorded all events w/in a reasonable amount of time",
+               listenerEventLatch.await(60, TimeUnit.SECONDS));
 
-    // suspend trigger
-    req = AutoScalingRequest.create(SolrRequest.METHOD.POST, suspendTriggerCommand);
-    response = solrClient.request(req);
-    assertEquals(response.get("result").toString(), "success");
+    // suspend the trigger
+    CloudTestUtils.assertAutoScalingRequest(cloudManager, suspendTriggerCommand);
 
-    timeSource.sleep(5000);
+    // there should be only coldNode ops now, and no coldReplica ops since searchable RF == collection RF
 
     events = listenerEvents.get("srt");
     assertEquals(events.toString(), 3, events.size());
@@ -451,51 +429,47 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
     assertEquals(ops.toString(), CollectionParams.CollectionAction.NONE, ops.get(0).getAction());
     assertEquals(ops.toString(), CollectionParams.CollectionAction.NONE, ops.get(1).getAction());
 
-    // wait for waitFor to elapse for all types of violations
-    timeSource.sleep(TimeUnit.MILLISECONDS.convert(waitForSeconds * 2, TimeUnit.SECONDS));
 
     listenerEvents.clear();
+    listenerEventLatch = new CountDownLatch(3);
     finished = new CountDownLatch(1);
     started = new CountDownLatch(1);
 
     log.info("## test single replicas.");
 
     // now allow single replicas
-    setTriggerCommand = "{" +
-        "'set-trigger' : {" +
-        "'name' : 'search_rate_trigger2'," +
-        "'event' : 'searchRate'," +
-        "'waitFor' : '" + waitForSeconds + "s'," +
-        "'enabled' : true," +
-        "'collections' : '" + COLL1 + "'," +
-        "'aboveRate' : 1.0," +
-        "'belowRate' : 0.3," +
-        "'aboveNodeRate' : 1.0," +
-        "'belowNodeRate' : 0.3," +
-        "'minReplicas' : 1," +
-        "'belowNodeOp' : 'none'," +
-        "'actions' : [" +
-        "{'name':'compute','class':'" + ComputePlanAction.class.getName() + "'}," +
-        "{'name':'execute','class':'" + ExecutePlanAction.class.getName() + "'}" +
-        "]" +
-        "}}";
-    req = AutoScalingRequest.create(SolrRequest.METHOD.POST, setTriggerCommand);
-    response = solrClient.request(req);
-    assertEquals(response.get("result").toString(), "success");
-
-    timeSource.sleep(TimeUnit.MILLISECONDS.convert(waitForSeconds + 1, TimeUnit.SECONDS));
-
-    await = started.await(20, TimeUnit.SECONDS);
-    assertTrue("The trigger did not fire at all", await);
-    await = finished.await(60, TimeUnit.SECONDS);
-    assertTrue("The trigger did not finish processing", await);
-
-    // suspend trigger
-    req = AutoScalingRequest.create(SolrRequest.METHOD.POST, suspendTriggerCommand);
-    response = solrClient.request(req);
-    assertEquals(response.get("result").toString(), "success");
-
-    timeSource.sleep(5000);
+    CloudTestUtils.assertAutoScalingRequest
+      (cloudManager,
+       "{" +
+       "'set-trigger' : {" +
+       "'name' : 'search_rate_trigger2'," +
+       "'event' : 'searchRate'," +
+       "'waitFor' : '" + waitForSeconds + "s'," +
+       "'enabled' : true," +
+       "'collections' : '" + COLL1 + "'," +
+       "'aboveRate' : 1.0," +
+       "'aboveNodeRate' : 1.0," +
+       "'belowRate' : 1.0," + // same excessively high values
+       "'belowNodeRate' : 1.0," +
+       "'minReplicas' : 1," + // NEW: force lower replicas
+       "'belowNodeOp' : 'none'," + // still do nothing to nodes
+       "'actions' : [" +
+       "{'name':'compute','class':'" + ComputePlanAction.class.getName() + "'}," +
+       "{'name':'execute','class':'" + ExecutePlanAction.class.getName() + "'}" +
+       "]" +
+       "}}");
+
+    assertTrue("The trigger did not start in a reasonable amount of time",
+               started.await(60, TimeUnit.SECONDS));
+    
+    assertTrue("The trigger did not finish in a reasonable amount of time",
+               started.await(60, TimeUnit.SECONDS));
+    
+    assertTrue("the listener should have recorded all events w/in a reasonable amount of time",
+               listenerEventLatch.await(60, TimeUnit.SECONDS));
+
+    // suspend the trigger
+    CloudTestUtils.assertAutoScalingRequest(cloudManager, suspendTriggerCommand);
 
     events = listenerEvents.get("srt");
     assertEquals(events.toString(), 3, events.size());
@@ -525,7 +499,7 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
   }
 
   @Test
-  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 21-May-2018
+  @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-13163") 
   public void testDeleteNode() throws Exception {
     CloudSolrClient solrClient = cluster.getSolrClient();
     String COLL1 = "deleteNode_collection";
@@ -546,103 +520,101 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
     CloudTestUtils.waitForState(cloudManager, COLL1, 60, TimeUnit.SECONDS,
         CloudTestUtils.clusterShape(1, 5));
 
-    String setTriggerCommand = "{" +
-        "'set-trigger' : {" +
-        "'name' : 'search_rate_trigger3'," +
-        "'event' : 'searchRate'," +
-        "'waitFor' : '" + waitForSeconds + "s'," +
-        "'enabled' : false," +
-        "'collections' : '" + COLL1 + "'," +
-        "'aboveRate' : 1.0," +
-        "'belowRate' : 0.1," +
-        // set limits to node rates
-        "'aboveNodeRate' : 1.0," +
-        "'belowNodeRate' : 0.1," +
-        // allow deleting all spare replicas
-        "'minReplicas' : 1," +
-        // allow requesting all deletions in one event
-        "'maxOps' : 10," +
-        // delete underutilised nodes
-        "'belowNodeOp' : 'DELETENODE'," +
-        "'actions' : [" +
-        "{'name':'compute','class':'" + ComputePlanAction.class.getName() + "'}," +
-        "{'name':'execute','class':'" + ExecutePlanAction.class.getName() + "'}" +
-        "]" +
-        "}}";
-    SolrRequest req = AutoScalingRequest.create(SolrRequest.METHOD.POST, setTriggerCommand);
-    NamedList<Object> response = solrClient.request(req);
-    assertEquals(response.get("result").toString(), "success");
-
-    String setListenerCommand = "{" +
-        "'set-listener' : " +
-        "{" +
-        "'name' : 'started'," +
-        "'trigger' : 'search_rate_trigger3'," +
-        "'stage' : ['STARTED']," +
-        "'class' : '" + StartedProcessingListener.class.getName() + "'" +
-        "}" +
-        "}";
-    req = AutoScalingRequest.create(SolrRequest.METHOD.POST, setListenerCommand);
-    response = solrClient.request(req);
-    assertEquals(response.get("result").toString(), "success");
-
-    setListenerCommand = "{" +
-        "'set-listener' : " +
-        "{" +
-        "'name' : 'srt'," +
-        "'trigger' : 'search_rate_trigger3'," +
-        "'stage' : ['FAILED','SUCCEEDED']," +
-        "'afterAction': ['compute', 'execute']," +
-        "'class' : '" + CapturingTriggerListener.class.getName() + "'" +
-        "}" +
-        "}";
-    req = AutoScalingRequest.create(SolrRequest.METHOD.POST, setListenerCommand);
-    response = solrClient.request(req);
-    assertEquals(response.get("result").toString(), "success");
-
-    setListenerCommand = "{" +
-        "'set-listener' : " +
-        "{" +
-        "'name' : 'finished'," +
-        "'trigger' : 'search_rate_trigger3'," +
-        "'stage' : ['SUCCEEDED']," +
-        "'class' : '" + FinishedProcessingListener.class.getName() + "'" +
-        "}" +
-        "}";
-    req = AutoScalingRequest.create(SolrRequest.METHOD.POST, setListenerCommand);
-    response = solrClient.request(req);
-    assertEquals(response.get("result").toString(), "success");
-
-    timeSource.sleep(TimeUnit.MILLISECONDS.convert(waitForSeconds + 1, TimeUnit.SECONDS));
-
+    CloudTestUtils.assertAutoScalingRequest
+      (cloudManager, 
+       "{" +
+       "'set-trigger' : {" +
+       "'name' : 'search_rate_trigger3'," +
+       "'event' : 'searchRate'," +
+       "'waitFor' : '" + waitForSeconds + "s'," +
+       "'enabled' : false," +
+       "'collections' : '" + COLL1 + "'," +
+       "'aboveRate' : 1.0," +
+       "'aboveNodeRate' : 1.0," +
+       // RecoveryStrategy calls /admin/ping, which calls /select so the rate may not be zero
+       // even when no external requests were made .. but it's hard to predict exactly
+       // what it will be.  use an insanely high rate so all shards/nodes are suspect
+       // and produce an Op regardless of how much internal traffic is produced...
+       "'belowRate' : 1.0," +
+       "'belowNodeRate' : 1.0," +
+       // ...our Ops should be to delete underutilised nodes...
+       "'belowNodeOp' : 'DELETENODE'," +
+       // ...allow deleting all spare replicas...
+       "'minReplicas' : 1," +
+       // ...and allow requesting all deletions in one event.
+       "'maxOps' : 10," +
+       "'actions' : [" +
+       "{'name':'compute','class':'" + ComputePlanAction.class.getName() + "'}," +
+       "{'name':'execute','class':'" + ExecutePlanAction.class.getName() + "'}" +
+       "]" +
+       "}}");
+
+    CloudTestUtils.assertAutoScalingRequest
+      (cloudManager, 
+       "{" +
+       "'set-listener' : " +
+       "{" +
+       "'name' : 'started'," +
+       "'trigger' : 'search_rate_trigger3'," +
+       "'stage' : ['STARTED']," +
+       "'class' : '" + StartedProcessingListener.class.getName() + "'" +
+       "}" +
+       "}");
+
+    CloudTestUtils.assertAutoScalingRequest
+      (cloudManager, 
+       "{" +
+       "'set-listener' : " +
+       "{" +
+       "'name' : 'srt'," +
+       "'trigger' : 'search_rate_trigger3'," +
+       "'stage' : ['FAILED','SUCCEEDED']," +
+       "'afterAction': ['compute', 'execute']," +
+       "'class' : '" + CapturingTriggerListener.class.getName() + "'" +
+       "}" +
+       "}");
+    listenerEventLatch = new CountDownLatch(3);
+
+    CloudTestUtils.assertAutoScalingRequest
+      (cloudManager, 
+       "{" +
+       "'set-listener' : " +
+       "{" +
+       "'name' : 'finished'," +
+       "'trigger' : 'search_rate_trigger3'," +
+       "'stage' : ['SUCCEEDED']," +
+       "'class' : '" + FinishedProcessingListener.class.getName() + "'" +
+       "}" +
+       "}");
+    
+    // Explicitly Do Nothing Here
+    
     // enable the trigger
-    String resumeTriggerCommand = "{" +
-        "'resume-trigger' : {" +
-        "'name' : 'search_rate_trigger3'" +
-        "}" +
-        "}";
-    req = AutoScalingRequest.create(SolrRequest.METHOD.POST, resumeTriggerCommand);
-    response = solrClient.request(req);
-    assertEquals(response.get("result").toString(), "success");
-
-    timeSource.sleep(TimeUnit.MILLISECONDS.convert(waitForSeconds + 1, TimeUnit.SECONDS));
-
-    boolean await = started.await(20, TimeUnit.SECONDS);
-    assertTrue("The trigger did not fire at all", await);
-    await = finished.await(90, TimeUnit.SECONDS);
-    assertTrue("The trigger did not finish processing", await);
-
+    CloudTestUtils.assertAutoScalingRequest
+      (cloudManager,
+       "{" +
+       "'resume-trigger' : {" +
+       "'name' : 'search_rate_trigger3'" +
+       "}" +
+       "}");
+
+    assertTrue("The trigger did not start in a reasonable amount of time",
+               started.await(60, TimeUnit.SECONDS));
+    
+    assertTrue("The trigger did not finish in a reasonable amount of time",
+               started.await(60, TimeUnit.SECONDS));
+    
+    assertTrue("the listener should have recorded all events w/in a reasonable amount of time",
+               listenerEventLatch.await(60, TimeUnit.SECONDS));
+    
     // suspend the trigger
-    String suspendTriggerCommand = "{" +
-        "'suspend-trigger' : {" +
-        "'name' : 'search_rate_trigger3'" +
-        "}" +
-        "}";
-    req = AutoScalingRequest.create(SolrRequest.METHOD.POST, suspendTriggerCommand);
-    response = solrClient.request(req);
-    assertEquals(response.get("result").toString(), "success");
-
-    timeSource.sleep(5000);
+    CloudTestUtils.assertAutoScalingRequest
+      (cloudManager,
+       "{" +
+       "'suspend-trigger' : {" +
+       "'name' : 'search_rate_trigger3'" +
+       "}" +
+       "}");
 
     List<CapturedEvent> events = listenerEvents.get("srt");
     assertEquals(events.toString(), 3, events.size());
@@ -651,8 +623,22 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
     assertEquals(ev.toString(), "compute", ev.actionName);
     List<TriggerEvent.Op> ops = (List<TriggerEvent.Op>)ev.event.getProperty(TriggerEvent.REQUESTED_OPS);
     assertNotNull("there should be some requestedOps: " + ev.toString(), ops);
-    // 4 DELETEREPLICA, 4 DELETENODE
-    assertEquals(ops.toString(), 8, ops.size());
+    // 4 DELETEREPLICA, 4 DELETENODE (minReplicas==1 & leader should be protected)
+    assertEquals(ops.toString(), 4 + 4, ops.size());
+    // The above assert can fail with actual==9 because all 5 nodes are resulting in a DELETENODE
+    // Which is problemtatic for 2 reasons:
+    //  1) it means that the leader node has not been protected from the 'belowNodeOp':'DELETENODE'
+    //     - definitely a bug that needs fixed
+    //  2) it suggests that minReplicas isn't being respected by 'belowNodeOp':'DELETENODE'
+    //     - something that needs more rigerous testing
+    //     - ie: if belowRate==0 && belowNodeRate==1 && minReplicas==2, will leader + 1 be protected?
+    //
+    // In general, to adequately trust testing of 'belowNodeOp':'DELETENODE' we should also test:
+    //  - some nodes with multiple replicas of the shard to ensure best nodes are picked
+    //  - node nodes hosting replicas of multiple shards/collection, only some of which are belowNodeRate
+
+
+
     AtomicInteger replicas = new AtomicInteger();
     AtomicInteger nodes = new AtomicInteger();
     ops.forEach(op -> {
@@ -715,10 +701,18 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
     @Override
     public synchronized void onEvent(TriggerEvent event, TriggerEventProcessorStage stage, String actionName,
                                      ActionContext context, Throwable error, String message) {
-      List<CapturedEvent> lst = listenerEvents.computeIfAbsent(config.name, s -> new ArrayList<>());
       CapturedEvent ev = new CapturedEvent(timeSource.getTimeNs(), context, config, stage, actionName, event, message);
-      log.info("=======> " + ev);
-      lst.add(ev);
+      final CountDownLatch latch = listenerEventLatch;
+      synchronized (latch) {
+        if (0 == latch.getCount()) {
+          log.warn("Ignoring captured event since latch is 'full': {}", ev);
+        } else {
+          List<CapturedEvent> lst = listenerEvents.computeIfAbsent(config.name, s -> new ArrayList<>());
+          log.info("=======> " + ev);
+          lst.add(ev);
+          latch.countDown();
+        }
+      }
     }
   }