You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ho...@apache.org on 2019/03/01 18:44:28 UTC

[lucene-solr] branch master updated: SOLR-12923: increase all await() times in TestSimTriggerIntegration

This is an automated email from the ASF dual-hosted git repository.

hossman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new fed8059  SOLR-12923: increase all await() times in TestSimTriggerIntegration
fed8059 is described below

commit fed80599a6f3c237ac61782a42196f6355b8cda6
Author: Chris Hostetter <ho...@apache.org>
AuthorDate: Fri Mar 1 11:38:18 2019 -0700

    SOLR-12923: increase all await() times in TestSimTriggerIntegration
    
    This means that 'real' failures (which should be rare and hopefully reproducile) will be 'slow', but the trade off will be less hard to reproduce 'false failures' due to thread contention on slow or heavily loaded (ie: jenkins) machines
---
 .../autoscaling/sim/TestSimTriggerIntegration.java | 173 ++++++++++-----------
 1 file changed, 84 insertions(+), 89 deletions(-)

diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimTriggerIntegration.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimTriggerIntegration.java
index 8b3ecd0..871e083 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimTriggerIntegration.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestSimTriggerIntegration.java
@@ -181,15 +181,14 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
     assertAutoscalingUpdateComplete();
 
     // wait until the two instances of action are created
-    if (!actionInitCalled.await(10000 / SPEED, TimeUnit.MILLISECONDS))  {
-      fail("Two TriggerAction instances should have been created by now");
-    }
+    assertTrue("Two TriggerAction instances were not created "+
+               "even after await()ing an excessive amount of time",
+               actionInitCalled.await(60, TimeUnit.SECONDS));
 
     String newNode = cluster.simAddNode();
 
-    if (!triggerFiredLatch.await(420000 / SPEED, TimeUnit.MILLISECONDS)) {
-      fail("Both triggers should have fired by now");
-    }
+    assertTrue("Both triggers did not fire event after await()ing an excessive amount of time",
+               triggerFiredLatch.await(60, TimeUnit.SECONDS));
 
     // reset shared state
     lastActionExecutedAt.set(0);
@@ -219,9 +218,9 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
     assertAutoscalingUpdateComplete();
 
     // wait until the two instances of action are created
-    if (!actionInitCalled.await(3000 / SPEED, TimeUnit.MILLISECONDS))  {
-      fail("Two TriggerAction instances should have been created by now");
-    }
+    assertTrue("Two TriggerAction instances were not created "+
+               "even after await()ing an excessive amount of time",
+               actionInitCalled.await(60, TimeUnit.SECONDS));
 
     // stop the node we had started earlier
     cluster.simRemoveNode(newNode, false);
@@ -497,13 +496,14 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
 
     assertAutoscalingUpdateComplete();
 
-    if (!actionInitCalled.await(5000 / SPEED, TimeUnit.MILLISECONDS))  {
-      fail("The TriggerAction should have been created by now");
-    }
+    // wait until the two instances of action are created
+    assertTrue("TriggerAction was not created even after await()ing an excessive amount of time",
+               actionInitCalled.await(60, TimeUnit.SECONDS));
 
     String newNode = cluster.simAddNode();
-    boolean await = triggerFiredLatch.await(240000 / SPEED, TimeUnit.MILLISECONDS);
-    assertTrue("The trigger did not fire at all", await);
+    
+    assertTrue("trigger did not fire event after await()ing an excessive amount of time",
+               triggerFiredLatch.await(60, TimeUnit.SECONDS));
     assertTrue(triggerFired.get());
     TriggerEvent nodeAddedEvent = events.iterator().next();
     assertNotNull(nodeAddedEvent);
@@ -528,11 +528,11 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
     assertAutoscalingUpdateComplete();
 
     // this should be a no-op so the action should have been created but init should not be called
-    if (!actionConstructorCalled.await(3000 / SPEED, TimeUnit.MILLISECONDS))  {
-      fail("The TriggerAction should have been created by now");
-    }
-
-    assertFalse(actionInitCalled.await(2000 / SPEED, TimeUnit.MILLISECONDS));
+    assertTrue("TriggerAction was not created even after await()ing an excessive amount of time",
+               actionConstructorCalled.await(60, TimeUnit.SECONDS));
+    // HACK: we are waiting a *short* amount of time and asserting that the init action was *not* called
+    assertFalse("init should not have been called on TriggerAction since update was No-Op",
+                actionInitCalled.await(3, TimeUnit.SECONDS));
   }
 
   @Test
@@ -550,14 +550,13 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
 
     assertAutoscalingUpdateComplete();
 
-    if (!actionInitCalled.await(5000 / SPEED, TimeUnit.MILLISECONDS))  {
-      fail("The TriggerAction should have been created by now");
-    }
+    assertTrue("Trigger was not init()ed even after await()ing an excessive amount of time",
+               actionInitCalled.await(60, TimeUnit.SECONDS));
 
     String lostNodeName = cluster.getSimClusterStateProvider().simGetRandomNode();
     cluster.simRemoveNode(lostNodeName, false);
-    boolean await = triggerFiredLatch.await(45000 / SPEED, TimeUnit.MILLISECONDS);
-    assertTrue("The trigger did not fire at all", await);
+    assertTrue("trigger did not fire event after await()ing an excessive amount of time",
+               triggerFiredLatch.await(60, TimeUnit.SECONDS));
     assertTrue(triggerFired.get());
     TriggerEvent nodeLostEvent = events.iterator().next();
     assertNotNull(nodeLostEvent);
@@ -582,11 +581,11 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
     assertAutoscalingUpdateComplete();
 
     // this should be a no-op so the action should have been created but init should not be called
-    if (!actionConstructorCalled.await(3000 / SPEED, TimeUnit.MILLISECONDS))  {
-      fail("The TriggerAction should have been created by now");
-    }
-
-    assertFalse(actionInitCalled.await(2000 / SPEED, TimeUnit.MILLISECONDS));
+    assertTrue("TriggerAction was not created even after await()ing an excessive amount of time",
+               actionConstructorCalled.await(60, TimeUnit.SECONDS));
+    // HACK: we are waiting a *short* amount of time and asserting that the init action was *not* called
+    assertFalse("init should not have been called on TriggerAction since update was No-Op",
+                actionInitCalled.await(3, TimeUnit.SECONDS));
   }
 
   // simulator doesn't support overseer functionality yet
@@ -727,17 +726,16 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
 
     assertAutoscalingUpdateComplete();
 
-    if (!actionInitCalled.await(3000 / SPEED, TimeUnit.MILLISECONDS))  {
-      fail("The TriggerAction should have been created by now");
-    }
+    assertTrue("Trigger was not init()ed even after await()ing an excessive amount of time",
+               actionInitCalled.await(60, TimeUnit.SECONDS));
 
     // wait for the trigger to run at least once
     cluster.getTimeSource().sleep(2 * waitForSeconds * 1000);
 
     // add node to generate the event
     String newNode = cluster.simAddNode();
-    boolean await = actionStarted.await(60000 / SPEED, TimeUnit.MILLISECONDS);
-    assertTrue("action did not start", await);
+    assertTrue("Action did not start even after await()ing an excessive amount of time",
+               actionStarted.await(60, TimeUnit.SECONDS));
     // event should be there
     TriggerEvent nodeAddedEvent = events.iterator().next();
     assertNotNull(nodeAddedEvent);
@@ -753,16 +751,16 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
     assertAutoscalingUpdateComplete();
 
     // new overseer leader should be elected and run triggers
-    await = actionInterrupted.await(3000 / SPEED, TimeUnit.MILLISECONDS);
-    assertTrue("action wasn't interrupted", await);
+    assertTrue("Action was not interupted even after await()ing an excessive amount of time",
+               actionInterrupted.await(60, TimeUnit.SECONDS));
     // it should fire again from enqueued event
-    await = actionStarted.await(60000 / SPEED, TimeUnit.MILLISECONDS);
-    assertTrue("action wasn't started", await);
+    assertTrue("Action did not (re-)start even after await()ing an excessive amount of time",
+               actionStarted.await(60, TimeUnit.SECONDS));
     TriggerEvent replayedEvent = events.iterator().next();
     assertTrue(replayedEvent.getProperty(TriggerEventQueue.ENQUEUE_TIME) != null);
     assertTrue(events + "\n" + replayedEvent.toString(), replayedEvent.getProperty(TriggerEventQueue.DEQUEUE_TIME) != null);
-    await = actionCompleted.await(10000 / SPEED, TimeUnit.MILLISECONDS);
-    assertTrue("action wasn't completed", await);
+    assertTrue("Action did not complete even after await()ing an excessive amount of time",
+               actionCompleted.await(60, TimeUnit.SECONDS));
     assertTrue(triggerFired.get());
   }
 
@@ -781,15 +779,14 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
 
     assertAutoscalingUpdateComplete();
 
-    if (!actionInitCalled.await(10000 / SPEED, TimeUnit.MILLISECONDS))  {
-      fail("The TriggerAction should have been created by now");
-    }
+    assertTrue("Trigger was not init()ed even after await()ing an excessive amount of time",
+               actionInitCalled.await(60, TimeUnit.SECONDS));
 
     events.clear();
 
     String newNode = cluster.simAddNode();
-    boolean await = triggerFiredLatch.await(90000 / SPEED, TimeUnit.MILLISECONDS);
-    assertTrue("The trigger did not fire at all", await);
+    assertTrue("trigger did not fire event after await()ing an excessive amount of time",
+               triggerFiredLatch.await(60, TimeUnit.SECONDS));
     assertTrue(triggerFired.get());
     // reset
     triggerFired.set(false);
@@ -803,8 +800,8 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
     cluster.getTimeSource().sleep(10000);
     // kill overseer
     cluster.simRestartOverseer(null);
-    await = triggerFiredLatch.await(60000 / SPEED, TimeUnit.MILLISECONDS);
-    assertTrue("The trigger did not fire at all", await);
+    assertTrue("trigger did not fire event after await()ing an excessive amount of time",
+               triggerFiredLatch.await(60, TimeUnit.SECONDS));
     assertTrue(triggerFired.get());
   }
 
@@ -888,9 +885,8 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
 
     // add a node
     String node = cluster.simAddNode();
-    if (!listener.onChangeLatch.await(10000 / SPEED, TimeUnit.MILLISECONDS)) {
-      fail("onChange listener didn't execute on cluster change");
-    }
+    assertTrue("cluster onChange listener didn't execute even after await()ing an excessive amount of time",
+               listener.onChangeLatch.await(60, TimeUnit.SECONDS));
     assertEquals(1, listener.addedNodes.size());
     assertEquals(node, listener.addedNodes.iterator().next());
     // verify that a znode doesn't exist (no trigger)
@@ -903,9 +899,9 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
     cluster.simRestartOverseer(overseerLeader);
     assertAutoscalingUpdateComplete();
 
-    if (!listener.onChangeLatch.await(10000, TimeUnit.MILLISECONDS)) {
-      fail("onChange listener didn't execute on cluster change");
-    }
+    assertTrue("cluster onChange listener didn't execute even after await()ing an excessive amount of time",
+               listener.onChangeLatch.await(60, TimeUnit.SECONDS));
+
     assertEquals(1, listener.lostNodes.size());
     assertEquals(overseerLeader, listener.lostNodes.iterator().next());
     assertEquals(0, listener.addedNodes.size());
@@ -959,9 +955,8 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
     // create another node
     log.info("====== ADD NODE 1");
     String node1 = cluster.simAddNode();
-    if (!listener.onChangeLatch.await(10000 / SPEED, TimeUnit.MILLISECONDS)) {
-      fail("onChange listener didn't execute on cluster change");
-    }
+    assertTrue("cluster onChange listener didn't execute even after await()ing an excessive amount of time",
+               listener.onChangeLatch.await(60, TimeUnit.SECONDS));
     assertEquals(1, listener.addedNodes.size());
     assertEquals(node1, listener.addedNodes.iterator().next());
     // verify that a znode exists
@@ -975,15 +970,13 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
     // kill overseer again
     log.info("====== KILL OVERSEER 2");
     cluster.simRestartOverseer(overseerLeader);
-    if (!listener.onChangeLatch.await(10000 / SPEED, TimeUnit.MILLISECONDS)) {
-      fail("onChange listener didn't execute on cluster change");
-    }
+    assertTrue("cluster onChange listener didn't execute even after await()ing an excessive amount of time",
+               listener.onChangeLatch.await(60, TimeUnit.SECONDS));
 
     assertAutoscalingUpdateComplete();
 
-    if (!triggerFiredLatch.await(120000 / SPEED, TimeUnit.MILLISECONDS)) {
-      fail("Trigger should have fired by now");
-    }
+    assertTrue("trigger did not fire event after await()ing an excessive amount of time",
+               triggerFiredLatch.await(60, TimeUnit.SECONDS));
     assertEquals(2, events.size());
     TriggerEvent nodeAdded = null;
     TriggerEvent nodeLost = null;
@@ -1094,19 +1087,19 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
        "}");
 
     assertAutoscalingUpdateComplete();
-    assertTrue("The TriggerAction should have been init'ed w/in a reasonable amount of time",
-               actionInitCalled.await(10, TimeUnit.SECONDS));
+    assertTrue("The TriggerAction was not init'ed even after await()ing an excessive amount of time",
+               actionInitCalled.await(60, TimeUnit.SECONDS));
 
     listenerEvents.clear();
     failDummyAction = false;
 
     String newNode = cluster.simAddNode();
-    boolean await = triggerFiredLatch.await(45000 / SPEED, TimeUnit.MILLISECONDS);
-    assertTrue("The trigger did not fire at all", await);
+    assertTrue("trigger did not fire event after await()ing an excessive amount of time",
+               triggerFiredLatch.await(60, TimeUnit.SECONDS));
     assertTrue(triggerFired.get());
 
-    assertTrue("the listeners shou;d have recorded all events w/in a reasonable amount of time",
-               listenerEventLatch.await(10, TimeUnit.SECONDS));
+    assertTrue("the listeners didn't recorded all events even after await()ing an excessive amount of time",
+               listenerEventLatch.await(60, TimeUnit.SECONDS));
     assertEquals("at least 2 event types should have been recorded", 2, listenerEvents.size());
 
     // check foo events
@@ -1168,11 +1161,12 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
     listenerEventLatch = new CountDownLatch(4 + 4); // fewer total due to failDummyAction
 
     newNode = cluster.simAddNode();
-    await = triggerFiredLatch.await(20000 / SPEED, TimeUnit.MILLISECONDS);
-    assertTrue("The trigger did not fire at all", await);
+    assertTrue("trigger did not fire event after await()ing an excessive amount of time",
+               triggerFiredLatch.await(60, TimeUnit.SECONDS));
+    assertTrue(triggerFired.get());
 
-    assertTrue("the listeners shoud have recorded all events w/in a reasonable amount of time",
-               listenerEventLatch.await(10, TimeUnit.SECONDS));
+    assertTrue("the listeners didn't recorded all events even after await()ing an excessive amount of time",
+               listenerEventLatch.await(60, TimeUnit.SECONDS));
     assertEquals("at least 2 event types should have been recorded", 2, listenerEvents.size());
 
     // check foo events
@@ -1239,18 +1233,18 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
        "}");
 
     assertAutoscalingUpdateComplete();
-    assertTrue("The TriggerAction should have been init'ed w/in a reasonable amount of time",
-               actionInitCalled.await(10, TimeUnit.SECONDS));
+    assertTrue("The TriggerAction was not init'ed even after await()ing an excessive amount of time",
+               actionInitCalled.await(60, TimeUnit.SECONDS));
 
     listenerCreated = new CountDownLatch(1);
     listenerEvents.clear();
 
     String newNode = cluster.simAddNode();
-    boolean await = triggerFiredLatch.await(45000 / SPEED, TimeUnit.MILLISECONDS);
-    assertTrue("The trigger did not fire at all", await);
+    assertTrue("trigger did not fire event after await()ing an excessive amount of time",
+               triggerFiredLatch.await(60, TimeUnit.SECONDS));
     assertTrue(triggerFired.get());
-    assertTrue("the listener should have recorded all events w/in a reasonable amount of time",
-               listenerEventLatch.await(10, TimeUnit.SECONDS));
+    assertTrue("the listener didn't recorded all events even after await()ing an excessive amount of time",
+               listenerEventLatch.await(60, TimeUnit.SECONDS));
 
     List<CapturedEvent> capturedEvents = listenerEvents.get("bar");
     assertNotNull("no events for 'bar'!", capturedEvents);
@@ -1265,10 +1259,10 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
     triggerFired.compareAndSet(true, false);
 
     String newNode2 = cluster.simAddNode();
-    await = triggerFiredLatch.await(20000 / SPEED, TimeUnit.MILLISECONDS);
-    assertTrue("The trigger did not fire at all", await);
-    assertTrue("the listener should have recorded all events w/in a reasonable amount of time",
-               listenerEventLatch.await(10, TimeUnit.SECONDS));
+    assertTrue("trigger did not fire event after await()ing an excessive amount of time",
+               triggerFiredLatch.await(60, TimeUnit.SECONDS));
+    assertTrue("the listener didn't recorded all events even after await()ing an excessive amount of time",
+               listenerEventLatch.await(60, TimeUnit.SECONDS));
 
     // there must be exactly one SUCCEEDED event
     capturedEvents = listenerEvents.get("bar");
@@ -1326,7 +1320,8 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
     CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(COLL1,
         "conf", 1, 2);
     create.process(solrClient);
-    CloudTestUtils.waitForState(cluster, COLL1, 10, TimeUnit.SECONDS, CloudTestUtils.clusterShape(1, 2, false, true));
+    CloudTestUtils.waitForState(cluster, "searchRate testing collection creating",
+                                COLL1, CloudTestUtils.clusterShape(1, 2, false, true));
 
     listenerEventLatch = new CountDownLatch(4);
     
@@ -1365,13 +1360,13 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
 
     cluster.getSimClusterStateProvider().simSetCollectionValue(COLL1, "QUERY./select.requestTimes:1minRate", 500, false, true);
 
-    boolean await = triggerStartedLatch.await(30000 / SPEED, TimeUnit.MILLISECONDS);
-    assertTrue("The trigger did not start in time", await);
-    await = triggerFinishedLatch.await(60000 / SPEED, TimeUnit.MILLISECONDS);
-    assertTrue("The trigger did not finish in time", await);
+    assertTrue("The trigger did not start even after await()ing an excessive amount of time",
+               triggerStartedLatch.await(60, TimeUnit.SECONDS));
+    assertTrue("The trigger did not finish even after await()ing an excessive amount of time",
+               triggerFinishedLatch.await(60, TimeUnit.SECONDS));
 
-    assertTrue("the listener should have recorded all events w/in a reasonable amount of time",
-               listenerEventLatch.await(10, TimeUnit.SECONDS));
+    assertTrue("the listener didn't recorded all events even after await()ing an excessive amount of time",
+               listenerEventLatch.await(60, TimeUnit.SECONDS));
 
     List<CapturedEvent> events = new ArrayList<>(listenerEvents.get("srt"));
     assertNotNull("Could not find events for srt", events);