You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ho...@apache.org on 2019/01/15 01:53:10 UTC

[lucene-solr] branch master updated: SOLR-12923: add a latch to TestTriggerListener to harden test that use it so they can deterministically know when all events have been proceeded

This is an automated email from the ASF dual-hosted git repository.

hossman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new d970375  SOLR-12923: add a latch to TestTriggerListener to harden test that use it so they can deterministically know when all events have been proceeded
d970375 is described below

commit d970375cd2f2357b88886a8da5ac67ef994f8d43
Author: Chris Hostetter <ho...@apache.org>
AuthorDate: Mon Jan 14 18:16:40 2019 -0700

    SOLR-12923: add a latch to TestTriggerListener to harden test that use it so they can deterministically know when all events have been proceeded
    
    This hardens several flakey tests, and allows the removal of a several arbitrary sleep calls
---
 .../autoscaling/sim/TestSimTriggerIntegration.java | 84 ++++++++++++----------
 1 file changed, 48 insertions(+), 36 deletions(-)

diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimTriggerIntegration.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimTriggerIntegration.java
index 3056d33..107c690 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimTriggerIntegration.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimTriggerIntegration.java
@@ -142,10 +142,13 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
     triggerFinishedCount = new AtomicInteger();
     events.clear();
     listenerEvents.clear();
+    allListenerEvents.clear();
+    failDummyAction = false;
+    listenerCreated = new CountDownLatch(1);
+    listenerEventLatch = new CountDownLatch(0);
   }
 
   @Test
-  //@BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-13072")
   public void testTriggerThrottling() throws Exception  {
     // for this test we want to create two triggers so we must assert that the actions were created twice
     actionInitCalled = new CountDownLatch(2);
@@ -441,7 +444,6 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
   }
 
   @Test
-  //@BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-13072")
   public void testNodeAddedTrigger() throws Exception {
     SolrClient solrClient = cluster.simGetSolrClient();
     assertAutoScalingRequest
@@ -495,7 +497,6 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
   }
 
   @Test
-  //@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-13072")
   public void testNodeLostTrigger() throws Exception {
     SolrClient solrClient = cluster.simGetSolrClient();
     assertAutoScalingRequest
@@ -727,7 +728,6 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
   }
 
   @Test
- // @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-13072")
   public void testEventFromRestoredState() throws Exception {
     SolrClient solrClient = cluster.simGetSolrClient();
     assertAutoScalingRequest
@@ -835,7 +835,6 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
   }
 
   @Test
-  //@BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-13072")
   public void testNodeMarkersRegistration() throws Exception {
     // for this test we want to create two triggers so we must assert that the actions were created twice
     actionInitCalled = new CountDownLatch(2);
@@ -972,6 +971,7 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
   static final Map<String, List<CapturedEvent>> listenerEvents = new ConcurrentHashMap<>();
   static final List<CapturedEvent> allListenerEvents = Collections.synchronizedList(new ArrayList<>());
   static volatile CountDownLatch listenerCreated = new CountDownLatch(1);
+  static volatile CountDownLatch listenerEventLatch = new CountDownLatch(0);
   static volatile boolean failDummyAction = false;
 
   public static class TestTriggerListener extends TriggerListenerBase {
@@ -984,10 +984,18 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
     @Override
     public synchronized void onEvent(TriggerEvent event, TriggerEventProcessorStage stage, String actionName,
                                      ActionContext context, Throwable error, String message) {
-      List<CapturedEvent> lst = listenerEvents.computeIfAbsent(config.name, s -> new ArrayList<>());
       CapturedEvent ev = new CapturedEvent(cluster.getTimeSource().getTimeNs(), context, config, stage, actionName, event, message);
-      lst.add(ev);
-      allListenerEvents.add(ev);
+      final CountDownLatch latch = listenerEventLatch;
+      synchronized (latch) {
+        if (0 == latch.getCount()) {
+          log.warn("Ignoring captured event since latch is 'full': {}", ev);
+        } else {
+          List<CapturedEvent> lst = listenerEvents.computeIfAbsent(config.name, s -> new ArrayList<>());
+          lst.add(ev);
+          allListenerEvents.add(ev);
+          latch.countDown();
+        }
+      }
     }
   }
 
@@ -1004,6 +1012,8 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
 
   @Test
   public void testListeners() throws Exception {
+    listenerEventLatch = new CountDownLatch(4 + 5);
+    
     SolrClient solrClient = cluster.simGetSolrClient();
     assertAutoScalingRequest
       ("{" +
@@ -1018,10 +1028,6 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
        "]" +
        "}}");
 
-    if (!actionInitCalled.await(3000 / SPEED, TimeUnit.MILLISECONDS))  {
-      fail("The TriggerAction should have been created by now");
-    }
-
     assertAutoScalingRequest
       ("{" +
        "'set-listener' : " +
@@ -1049,6 +1055,8 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
        "}");
 
     assertAutoscalingUpdateComplete();
+    assertTrue("The TriggerAction should have been init'ed w/in a reasonable amount of time",
+               actionInitCalled.await(10, TimeUnit.SECONDS));
 
     listenerEvents.clear();
     failDummyAction = false;
@@ -1058,9 +1066,9 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
     assertTrue("The trigger did not fire at all", await);
     assertTrue(triggerFired.get());
 
-    assertEquals("both listeners should have fired", 2, listenerEvents.size());
-
-    cluster.getTimeSource().sleep(3000);
+    assertTrue("the listeners shou;d have recorded all events w/in a reasonable amount of time",
+               listenerEventLatch.await(10, TimeUnit.SECONDS));
+    assertEquals("at least 2 event types should have been recorded", 2, listenerEvents.size());
 
     // check foo events
     List<CapturedEvent> testEvents = listenerEvents.get("foo");
@@ -1118,12 +1126,15 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
     triggerFiredLatch = new CountDownLatch(1);
     listenerEvents.clear();
     failDummyAction = true;
+    listenerEventLatch = new CountDownLatch(4 + 4); // fewer total due to failDummyAction
 
     newNode = cluster.simAddNode();
     await = triggerFiredLatch.await(20000 / SPEED, TimeUnit.MILLISECONDS);
     assertTrue("The trigger did not fire at all", await);
 
-    cluster.getTimeSource().sleep(3000);
+    assertTrue("the listeners shoud have recorded all events w/in a reasonable amount of time",
+               listenerEventLatch.await(10, TimeUnit.SECONDS));
+    assertEquals("at least 2 event types should have been recorded", 2, listenerEvents.size());
 
     // check foo events
     testEvents = listenerEvents.get("foo");
@@ -1160,10 +1171,10 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
   }
 
   @Test
-  //@BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-13072")
   public void testCooldown() throws Exception {
     SolrClient solrClient = cluster.simGetSolrClient();
     failDummyAction = false;
+    listenerEventLatch = new CountDownLatch(1);
     waitForSeconds = 1;
     assertAutoScalingRequest
       ("{" +
@@ -1189,6 +1200,8 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
        "}");
 
     assertAutoscalingUpdateComplete();
+    assertTrue("The TriggerAction should have been init'ed w/in a reasonable amount of time",
+               actionInitCalled.await(10, TimeUnit.SECONDS));
 
     listenerCreated = new CountDownLatch(1);
     listenerEvents.clear();
@@ -1197,16 +1210,17 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
     boolean await = triggerFiredLatch.await(45000 / SPEED, TimeUnit.MILLISECONDS);
     assertTrue("The trigger did not fire at all", await);
     assertTrue(triggerFired.get());
-    // wait for listener to capture the SUCCEEDED stage
-    cluster.getTimeSource().sleep(5000);
+    assertTrue("the listener should have recorded all events w/in a reasonable amount of time",
+               listenerEventLatch.await(10, TimeUnit.SECONDS));
 
     List<CapturedEvent> capturedEvents = listenerEvents.get("bar");
     assertNotNull("no events for 'bar'!", capturedEvents);
-    // we may get a few IGNORED events if other tests caused events within cooldown period
-    assertTrue(capturedEvents.toString(), capturedEvents.size() > 0);
-    long prevTimestamp = capturedEvents.get(capturedEvents.size() - 1).timestamp;
+
+    assertEquals(capturedEvents.toString(), 1, capturedEvents.size());
+    long prevTimestamp = capturedEvents.get(0).timestamp;
 
     // reset the trigger and captured events
+    listenerEventLatch = new CountDownLatch(1);
     listenerEvents.clear();
     triggerFiredLatch = new CountDownLatch(1);
     triggerFired.compareAndSet(true, false);
@@ -1214,14 +1228,14 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
     String newNode2 = cluster.simAddNode();
     await = triggerFiredLatch.await(20000 / SPEED, TimeUnit.MILLISECONDS);
     assertTrue("The trigger did not fire at all", await);
-    // wait for listener to capture the SUCCEEDED stage
-    cluster.getTimeSource().sleep(8000);
+    assertTrue("the listener should have recorded all events w/in a reasonable amount of time",
+               listenerEventLatch.await(10, TimeUnit.SECONDS));
 
     // there must be exactly one SUCCEEDED event
     capturedEvents = listenerEvents.get("bar");
     assertNotNull(capturedEvents);
-    assertTrue(capturedEvents.toString(), capturedEvents.size() >= 1);
-    CapturedEvent ev = capturedEvents.get(capturedEvents.size() - 1);
+    assertEquals(capturedEvents.toString(), 1, capturedEvents.size());
+    CapturedEvent ev = capturedEvents.get(0);
     assertEquals(ev.toString(), TriggerEventProcessorStage.SUCCEEDED, ev.stage);
     // the difference between timestamps of the first SUCCEEDED and the last SUCCEEDED
     // must be larger than cooldown period
@@ -1267,7 +1281,6 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
 
 
   @Test
-  //@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-13072") // this test is way to sensitive to timing, must be beasted before returned
   public void testSearchRate() throws Exception {
     SolrClient solrClient = cluster.simGetSolrClient();
     String COLL1 = "collection1";
@@ -1276,6 +1289,8 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
     create.process(solrClient);
     CloudTestUtils.waitForState(cluster, COLL1, 10, TimeUnit.SECONDS, CloudTestUtils.clusterShape(1, 2, false, true));
 
+    listenerEventLatch = new CountDownLatch(4);
+    
     assertAutoScalingRequest
       ("{" +
        "'set-trigger' : {" +
@@ -1308,10 +1323,6 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
 
     assertAutoscalingUpdateComplete();
 
-//    SolrParams query = params(CommonParams.Q, "*:*");
-//    for (int i = 0; i < 500; i++) {
-//      solrClient.query(COLL1, query);
-//    }
 
     cluster.getSimClusterStateProvider().simSetCollectionValue(COLL1, "QUERY./select.requestTimes:1minRate", 500, false, true);
 
@@ -1319,12 +1330,13 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
     assertTrue("The trigger did not start in time", await);
     await = triggerFinishedLatch.await(60000 / SPEED, TimeUnit.MILLISECONDS);
     assertTrue("The trigger did not finish in time", await);
-    // wait for listener to capture the SUCCEEDED stage
-    cluster.getTimeSource().sleep(5000);
-    
-    List<CapturedEvent> events = listenerEvents.get("srt");
+
+    assertTrue("the listener should have recorded all events w/in a reasonable amount of time",
+               listenerEventLatch.await(10, TimeUnit.SECONDS));
+
+    List<CapturedEvent> events = new ArrayList<>(listenerEvents.get("srt"));
     assertNotNull("Could not find events for srt", events);
-    assertEquals(listenerEvents.toString(), 4, events.size());
+    assertEquals(events.toString(), 4, events.size());
     assertEquals("AFTER_ACTION", events.get(0).stage.toString());
     assertEquals("compute", events.get(0).actionName);
     assertEquals("AFTER_ACTION", events.get(1).stage.toString());