You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2018/04/02 07:38:02 UTC

[15/34] lucene-solr:jira/solr-12095: SOLR-12133: Fix failures in TriggerIntegrationTest.testEventQueue due to race conditions

SOLR-12133: Fix failures in TriggerIntegrationTest.testEventQueue due to race conditions


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/83cca5cd
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/83cca5cd
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/83cca5cd

Branch: refs/heads/jira/solr-12095
Commit: 83cca5cd06aa0fdd9ce98415d6e9a0b1ecdfc7f5
Parents: ae6d29f
Author: Shalin Shekhar Mangar <sh...@apache.org>
Authored: Fri Mar 30 16:42:35 2018 +0530
Committer: Shalin Shekhar Mangar <sh...@apache.org>
Committed: Fri Mar 30 16:42:35 2018 +0530

----------------------------------------------------------------------
 .../cloud/autoscaling/ScheduledTriggers.java    | 13 +++++++++
 .../autoscaling/TriggerIntegrationTest.java     | 28 +++++++++++---------
 .../apache/solr/common/util/ExecutorUtil.java   |  4 +++
 3 files changed, 32 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/83cca5cd/solr/core/src/java/org/apache/solr/cloud/autoscaling/ScheduledTriggers.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/ScheduledTriggers.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/ScheduledTriggers.java
index 0e21b04..28efe92 100644
--- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/ScheduledTriggers.java
+++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/ScheduledTriggers.java
@@ -66,6 +66,7 @@ import static org.apache.solr.common.params.AutoScalingParams.ACTION_THROTTLE_PE
 import static org.apache.solr.common.params.AutoScalingParams.TRIGGER_COOLDOWN_PERIOD_SECONDS;
 import static org.apache.solr.common.params.AutoScalingParams.TRIGGER_CORE_POOL_SIZE;
 import static org.apache.solr.common.params.AutoScalingParams.TRIGGER_SCHEDULE_DELAY_SECONDS;
+import static org.apache.solr.common.util.ExecutorUtil.awaitTermination;
 
 /**
  * Responsible for scheduling active triggers, starting and stopping them and
@@ -497,9 +498,21 @@ public class ScheduledTriggers implements Closeable {
     }
     // shutdown and interrupt all running tasks because there's no longer any
     // guarantee about cluster state
+    log.debug("Shutting down scheduled thread pool executor now");
     scheduledThreadPoolExecutor.shutdownNow();
+
+    log.debug("Shutting down action executor now");
     actionExecutor.shutdownNow();
+
     listeners.close();
+
+    log.debug("Awaiting termination for action executor");
+    awaitTermination(actionExecutor);
+
+    log.debug("Awaiting termination for scheduled thread pool executor");
+    awaitTermination(scheduledThreadPoolExecutor);
+
+    log.debug("ScheduledTriggers closed completely");
   }
 
   private class TriggerWrapper implements Runnable, Closeable {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/83cca5cd/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java
index 5dfe34c..2902c48 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java
@@ -64,15 +64,16 @@ import static org.apache.solr.common.cloud.ZkStateReader.SOLR_AUTOSCALING_CONF_P
 public class TriggerIntegrationTest extends SolrCloudTestCase {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
-  private static CountDownLatch actionConstructorCalled;
-  private static CountDownLatch actionInitCalled;
-  private static CountDownLatch triggerFiredLatch;
-  private static int waitForSeconds = 1;
-  private static CountDownLatch actionStarted;
-  private static CountDownLatch actionInterrupted;
-  private static CountDownLatch actionCompleted;
+  private static volatile CountDownLatch actionConstructorCalled;
+  private static volatile CountDownLatch actionInitCalled;
+  private static volatile CountDownLatch triggerFiredLatch;
+  private static volatile int waitForSeconds = 1;
+  private static volatile CountDownLatch actionStarted;
+  private static volatile CountDownLatch actionInterrupted;
+  private static volatile CountDownLatch actionCompleted;
   private static AtomicBoolean triggerFired;
   private static Set<TriggerEvent> events = ConcurrentHashMap.newKeySet();
+  public static volatile long eventQueueActionWait = 5000;
   private static SolrCloudManager cloudManager;
 
   // use the same time source as triggers use
@@ -166,6 +167,7 @@ public class TriggerIntegrationTest extends SolrCloudTestCase {
     events.clear();
     listenerEvents.clear();
     lastActionExecutedAt.set(0);
+    eventQueueActionWait = 5000;
     while (cluster.getJettySolrRunners().size() < 2) {
       // perhaps a test stopped a node but didn't start it back
       // lets start a node
@@ -415,14 +417,17 @@ public class TriggerIntegrationTest extends SolrCloudTestCase {
     public void process(TriggerEvent event, ActionContext actionContext) {
       log.info("-- event: " + event);
       events.add(event);
+      long eventQueueActionWaitCopy = eventQueueActionWait;
       getActionStarted().countDown();
       try {
-        Thread.sleep(eventQueueActionWait);
+        log.info("-- Going to sleep for {} ms", eventQueueActionWaitCopy);
+        Thread.sleep(eventQueueActionWaitCopy);
+        log.info("-- Woke up after sleeping for {} ms", eventQueueActionWaitCopy);
         triggerFired.compareAndSet(false, true);
         getActionCompleted().countDown();
       } catch (InterruptedException e) {
+        log.info("-- Interrupted");
         getActionInterrupted().countDown();
-        return;
       }
     }
 
@@ -434,10 +439,7 @@ public class TriggerIntegrationTest extends SolrCloudTestCase {
     }
   }
 
-  public static long eventQueueActionWait = 5000;
-
   @Test
-  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
   public void testEventQueue() throws Exception {
     waitForSeconds = 1;
     CloudSolrClient solrClient = cluster.getSolrClient();
@@ -471,6 +473,7 @@ public class TriggerIntegrationTest extends SolrCloudTestCase {
     JettySolrRunner newNode = cluster.startJettySolrRunner();
     boolean await = actionStarted.await(60, TimeUnit.SECONDS);
     assertTrue("action did not start", await);
+    eventQueueActionWait = 1;
     // event should be there
     NodeAddedTrigger.NodeAddedEvent nodeAddedEvent = (NodeAddedTrigger.NodeAddedEvent) events.iterator().next();
     assertNotNull(nodeAddedEvent);
@@ -478,7 +481,6 @@ public class TriggerIntegrationTest extends SolrCloudTestCase {
     assertFalse(triggerFired.get());
     events.clear();
     actionStarted = new CountDownLatch(1);
-    eventQueueActionWait = 1;
     // kill overseer leader
     cluster.stopJettySolrRunner(overseerLeaderIndex);
     Thread.sleep(5000);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/83cca5cd/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java b/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java
index a045726..7458016 100644
--- a/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java
@@ -73,6 +73,10 @@ public class ExecutorUtil {
 
   public static void shutdownAndAwaitTermination(ExecutorService pool) {
     pool.shutdown(); // Disable new tasks from being submitted
+    awaitTermination(pool);
+  }
+
+  public static void awaitTermination(ExecutorService pool) {
     boolean shutdown = false;
     while (!shutdown) {
       try {