You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@geode.apache.org by bs...@apache.org on 2019/05/31 21:09:46 UTC

[geode] branch feature/GEODE-6823 created (now 49997f3)

This is an automated email from the ASF dual-hosted git repository.

bschuchardt pushed a change to branch feature/GEODE-6823
in repository https://gitbox.apache.org/repos/asf/geode.git.


      at 49997f3  GEODE-6823 Hang in ElderInitProcessor.init(

This branch includes the following new commits:

     new 49997f3  GEODE-6823 Hang in ElderInitProcessor.init(

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[geode] 01/01: GEODE-6823 Hang in ElderInitProcessor.init(

Posted by bs...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

bschuchardt pushed a commit to branch feature/GEODE-6823
in repository https://gitbox.apache.org/repos/asf/geode.git

commit 49997f3ea91109a8f8e17404f2fac5e7af2c19f3
Author: Bruce Schuchardt <bs...@pivotal.io>
AuthorDate: Fri May 31 13:55:19 2019 -0700

    GEODE-6823 Hang in ElderInitProcessor.init(
    
    This corrects elder init processing to use the isCloseInProgress to
    check for shutdown.  A coding error during refactoring caused it to check the
    isCloseInProgress() method, which did more than just return the value of
    the isCloseInProgress variable and was incorrectly reporting a close in progress
    during startup operations.
    
    I've renamed the old isCloseInProgress() method to avoid similar coding
    errors in the future and added a new implementation that merely returns
    the value of the field, as you'd expect it to do.
    
    While writing tests I found that the ClusterElderManagerTest was leaving
    blocked threads behind because the waitForElder() method in
    ClusterElderManager was not interruptable.  I've changed that method to
    be interruptable.  We don't interrupt message-processing threads so this
    should be a safe change.
---
 .../internal/ClusterDistributionManager.java       | 27 +++++++-------
 .../distributed/internal/ClusterElderManager.java  | 27 ++++++--------
 .../distributed/internal/DistributionManager.java  |  2 +-
 .../internal/locks/ElderInitProcessor.java         | 38 +++++++++++---------
 .../internal/locks/GrantorRequestProcessor.java    | 17 +++++++--
 .../membership/gms/mgr/GMSMembershipManager.java   |  2 +-
 .../internal/ClusterElderManagerTest.java          | 41 +++++++++++++++++-----
 7 files changed, 94 insertions(+), 60 deletions(-)

diff --git a/geode-core/src/main/java/org/apache/geode/distributed/internal/ClusterDistributionManager.java b/geode-core/src/main/java/org/apache/geode/distributed/internal/ClusterDistributionManager.java
index a8d1c08..22d714b 100644
--- a/geode-core/src/main/java/org/apache/geode/distributed/internal/ClusterDistributionManager.java
+++ b/geode-core/src/main/java/org/apache/geode/distributed/internal/ClusterDistributionManager.java
@@ -968,7 +968,7 @@ public class ClusterDistributionManager implements DistributionManager {
       throw err;
     } catch (Throwable t) {
       SystemFailure.checkFailure();
-      if (isCloseInProgress()) {
+      if (shouldInhibitMembershipWarnings()) {
         logger.debug("Caught unusual exception during shutdown: {}", t.getMessage(), t);
       } else {
         logger.warn("Task failed with exception", t);
@@ -1975,18 +1975,18 @@ public class ClusterDistributionManager implements DistributionManager {
     }
   }
 
-  /**
-   * Returns true if this DM or the DistributedSystem owned by it is closing or is closed.
-   */
-  boolean isCloseInProgress() {
-    if (closeInProgress) {
+  private boolean shouldInhibitMembershipWarnings() {
+    if (isCloseInProgress()) {
       return true;
     }
     InternalDistributedSystem ds = getSystem();
     return ds != null && ds.isDisconnecting();
   }
 
-  public boolean isShutdownStarted() {
+  /**
+   * Returns true if this distribution manager has initiated shutdown
+   */
+  public boolean isCloseInProgress() {
     return closeInProgress;
   }
 
@@ -2051,7 +2051,7 @@ public class ClusterDistributionManager implements DistributionManager {
               membershipEventQueue.take();
           handleMemberEvent(ev);
         } catch (InterruptedException e) {
-          if (isCloseInProgress()) {
+          if (shouldInhibitMembershipWarnings()) {
             if (logger.isTraceEnabled()) {
               logger.trace("MemberEventInvoker: InterruptedException during shutdown");
             }
@@ -2062,7 +2062,7 @@ public class ClusterDistributionManager implements DistributionManager {
         } catch (DistributedSystemDisconnectedException e) {
           break;
         } catch (CancelException e) {
-          if (isCloseInProgress()) {
+          if (shouldInhibitMembershipWarnings()) {
             if (logger.isTraceEnabled()) {
               logger.trace("MemberEventInvoker: cancelled");
             }
@@ -2653,7 +2653,7 @@ public class ClusterDistributionManager implements DistributionManager {
         stats.incNodes(-1);
       }
       String msg;
-      if (p_crashed && !isCloseInProgress()) {
+      if (p_crashed && !shouldInhibitMembershipWarnings()) {
         msg =
             "Member at {} unexpectedly left the distributed cache: {}";
         addMemberEvent(new MemberCrashedEvent(theId, p_reason));
@@ -2881,7 +2881,7 @@ public class ClusterDistributionManager implements DistributionManager {
   }
 
   @Override
-  public ElderState getElderState(boolean waitToBecomeElder) {
+  public ElderState getElderState(boolean waitToBecomeElder) throws InterruptedException {
     return clusterElderManager.getElderState(waitToBecomeElder);
   }
 
@@ -2890,7 +2890,8 @@ public class ClusterDistributionManager implements DistributionManager {
    *
    * @return true if newElder is the elder; false if it is no longer a member or we are the elder.
    */
-  public boolean waitForElder(final InternalDistributedMember desiredElder) {
+  public boolean waitForElder(final InternalDistributedMember desiredElder)
+      throws InterruptedException {
 
     return clusterElderManager.waitForElder(desiredElder);
   }
@@ -3476,7 +3477,7 @@ public class ClusterDistributionManager implements DistributionManager {
         try {
           handleEvent(manager, listener);
         } catch (CancelException e) {
-          if (manager.isCloseInProgress()) {
+          if (manager.shouldInhibitMembershipWarnings()) {
             if (logger.isTraceEnabled()) {
               logger.trace("MemberEventInvoker: cancelled");
             }
diff --git a/geode-core/src/main/java/org/apache/geode/distributed/internal/ClusterElderManager.java b/geode-core/src/main/java/org/apache/geode/distributed/internal/ClusterElderManager.java
index f18ea45..debe3e1 100644
--- a/geode-core/src/main/java/org/apache/geode/distributed/internal/ClusterElderManager.java
+++ b/geode-core/src/main/java/org/apache/geode/distributed/internal/ClusterElderManager.java
@@ -82,7 +82,7 @@ public class ClusterElderManager {
     return clusterDistributionManager.getId().equals(getElderCandidate());
   }
 
-  public ElderState getElderState(boolean waitToBecomeElder) {
+  public ElderState getElderState(boolean waitToBecomeElder) throws InterruptedException {
     if (waitToBecomeElder) {
       // This should always return true.
       waitForElder(clusterDistributionManager.getId());
@@ -121,22 +121,16 @@ public class ClusterElderManager {
    * @return true if desiredElder is the elder; false if it is no longer a member or the local
    *         member is the elder
    */
-  public boolean waitForElder(final InternalDistributedMember desiredElder) {
+  public boolean waitForElder(final InternalDistributedMember desiredElder)
+      throws InterruptedException {
     MembershipChangeListener changeListener =
         new MembershipChangeListener();
 
     clusterDistributionManager.addMembershipListener(changeListener);
 
-    boolean interrupted = false;
     InternalDistributedMember currentElder;
 
     try {
-      if (logger.isDebugEnabled()) {
-        currentElder = getElderCandidate();
-        logger.debug("Waiting for Elder to change. Expecting Elder to be {}, is {}.",
-            desiredElder, currentElder);
-      }
-
       while (true) {
         if (clusterDistributionManager.isCloseInProgress()) {
           return false;
@@ -145,6 +139,10 @@ public class ClusterElderManager {
         if (desiredElder.equals(currentElder)) {
           return true;
         }
+        if (logger.isDebugEnabled()) {
+          logger.debug("Expecting Elder to be {} but it is {}.",
+              desiredElder, currentElder);
+        }
         if (!clusterDistributionManager.isCurrentMember(desiredElder)) {
           return false; // no longer present
         }
@@ -155,18 +153,13 @@ public class ClusterElderManager {
           return false;
         }
 
-        try {
-          changeListener.waitForMembershipChange();
-        } catch (InterruptedException e) {
-          interrupted = true;
+        if (logger.isDebugEnabled()) {
+          logger.debug("Waiting for membership to change");
         }
+        changeListener.waitForMembershipChange();
       }
     } finally {
       clusterDistributionManager.removeMembershipListener(changeListener);
-
-      if (interrupted) {
-        Thread.currentThread().interrupt();
-      }
     }
   }
 
diff --git a/geode-core/src/main/java/org/apache/geode/distributed/internal/DistributionManager.java b/geode-core/src/main/java/org/apache/geode/distributed/internal/DistributionManager.java
index 0dd6330..2e52900 100644
--- a/geode-core/src/main/java/org/apache/geode/distributed/internal/DistributionManager.java
+++ b/geode-core/src/main/java/org/apache/geode/distributed/internal/DistributionManager.java
@@ -166,7 +166,7 @@ public interface DistributionManager extends ReplySender {
    * @throws IllegalStateException if elder try lock fails
    * @since GemFire 4.0
    */
-  ElderState getElderState(boolean force);
+  ElderState getElderState(boolean force) throws InterruptedException;
 
   /**
    * Returns the membership port of the underlying distribution manager used for communication.
diff --git a/geode-core/src/main/java/org/apache/geode/distributed/internal/locks/ElderInitProcessor.java b/geode-core/src/main/java/org/apache/geode/distributed/internal/locks/ElderInitProcessor.java
index a693f9f..8c56e2f 100644
--- a/geode-core/src/main/java/org/apache/geode/distributed/internal/locks/ElderInitProcessor.java
+++ b/geode-core/src/main/java/org/apache/geode/distributed/internal/locks/ElderInitProcessor.java
@@ -169,21 +169,27 @@ public class ElderInitProcessor extends ReplyProcessor21 {
       ArrayList grantorVersions = new ArrayList(); // grantor versions
       ArrayList grantorSerialNumbers = new ArrayList(); // serial numbers of grantor svcs
       ArrayList nonGrantors = new ArrayList(); // svc names non-grantor for
-      if (dm.waitForElder(this.getSender())) {
-        GrantorRequestProcessor.readyForElderRecovery(dm.getSystem(), this.getSender(), null);
-        DLockService.recoverRmtElder(grantors, grantorVersions, grantorSerialNumbers, nonGrantors);
-        reply(dm, grantors, grantorVersions, grantorSerialNumbers, nonGrantors);
-      } else if (dm.getOtherNormalDistributionManagerIds().isEmpty()) {
-        // Either we're alone (and received a message from an unknown member) or else we haven't
-        // yet processed a view. In either case, we clearly don't have any grantors,
-        // so we return empty lists.
-
-        logger.info(LogMarker.DLS_MARKER,
-            "{}: returning empty lists because I know of no other members.",
-            this);
-        reply(dm, grantors, grantorVersions, grantorSerialNumbers, nonGrantors);
-      } else {
-        logger.info(LogMarker.DLS_MARKER, "{}: disregarding request from departed member.", this);
+      try {
+        if (dm.waitForElder(this.getSender())) {
+          GrantorRequestProcessor.readyForElderRecovery(dm.getSystem(), this.getSender(), null);
+          DLockService
+              .recoverRmtElder(grantors, grantorVersions, grantorSerialNumbers, nonGrantors);
+          reply(dm, grantors, grantorVersions, grantorSerialNumbers, nonGrantors);
+        } else if (dm.getOtherNormalDistributionManagerIds().isEmpty()) {
+          // Either we're alone (and received a message from an unknown member) or else we haven't
+          // yet processed a view. In either case, we clearly don't have any grantors,
+          // so we return empty lists.
+
+          logger.info(LogMarker.DLS_MARKER,
+              "{}: returning empty lists because I know of no other members.",
+              this);
+          reply(dm, grantors, grantorVersions, grantorSerialNumbers, nonGrantors);
+        } else {
+          logger.info(LogMarker.DLS_MARKER, "{}: disregarding request from departed member.", this);
+        }
+      } catch (InterruptedException e) {
+        // shutting down
+        logger.info("Elder initialization interrupted - will not send a reply");
       }
     }
 
@@ -207,7 +213,7 @@ public class ElderInitProcessor extends ReplyProcessor21 {
     @Override
     public String toString() {
       StringBuffer buff = new StringBuffer();
-      buff.append("ElderInitMessage (processorId='").append(this.processorId).append(")");
+      buff.append("ElderInitMessage (processorId=").append(this.processorId).append(")");
       return buff.toString();
     }
   }
diff --git a/geode-core/src/main/java/org/apache/geode/distributed/internal/locks/GrantorRequestProcessor.java b/geode-core/src/main/java/org/apache/geode/distributed/internal/locks/GrantorRequestProcessor.java
index cc8dbee..85ce578 100644
--- a/geode-core/src/main/java/org/apache/geode/distributed/internal/locks/GrantorRequestProcessor.java
+++ b/geode-core/src/main/java/org/apache/geode/distributed/internal/locks/GrantorRequestProcessor.java
@@ -209,7 +209,8 @@ public class GrantorRequestProcessor extends ReplyProcessor21 {
    * Sets currentElder to the memberId of the current elder if elder is remote; null if elder is in
    * our vm.
    */
-  private static ElderState startElderCall(InternalDistributedSystem sys, DLockService dls) {
+  private static ElderState startElderCall(InternalDistributedSystem sys, DLockService dls)
+      throws InterruptedException {
     InternalDistributedMember elder;
     ElderState es = null;
 
@@ -328,7 +329,12 @@ public class GrantorRequestProcessor extends ReplyProcessor21 {
     try {
       do {
         tryNewElder = false;
-        final ElderState es = startElderCall(system, service);
+        ElderState es = null;
+        try {
+          es = startElderCall(system, service);
+        } catch (InterruptedException e) {
+          interrupted = true;
+        }
         dm.throwIfDistributionStopped();
         try {
           if (es != null) {
@@ -491,7 +497,12 @@ public class GrantorRequestProcessor extends ReplyProcessor21 {
 
     protected void basicProcess(final DistributionManager dm) {
       // we should be in the elder
-      ElderState es = dm.getElderState(true);
+      ElderState es = null;
+      try {
+        dm.getElderState(true);
+      } catch (InterruptedException e) {
+        logger.info("Interrupted while processing {}", this);
+      }
       switch (this.opCode) {
         case GET_OP:
           replyGrantorInfo(dm, es.getGrantor(this.serviceName, getSender(), this.dlsSerialNumber));
diff --git a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/mgr/GMSMembershipManager.java b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
index f9352ea..46110e2 100644
--- a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
+++ b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
@@ -2620,7 +2620,7 @@ public class GMSMembershipManager implements MembershipManager, Manager {
   @Override
   public boolean isShutdownStarted() {
     ClusterDistributionManager dm = listener.getDM();
-    return shutdownInProgress || (dm != null && dm.isShutdownStarted());
+    return shutdownInProgress || (dm != null && dm.isCloseInProgress());
   }
 
   @Override
diff --git a/geode-core/src/test/java/org/apache/geode/distributed/internal/ClusterElderManagerTest.java b/geode-core/src/test/java/org/apache/geode/distributed/internal/ClusterElderManagerTest.java
index 8f9cad9..48b0a1f 100644
--- a/geode-core/src/test/java/org/apache/geode/distributed/internal/ClusterElderManagerTest.java
+++ b/geode-core/src/test/java/org/apache/geode/distributed/internal/ClusterElderManagerTest.java
@@ -123,7 +123,7 @@ public class ClusterElderManagerTest {
   }
 
   @Test
-  public void waitForElderReturnsTrueIfAnotherMemberIsElder() {
+  public void waitForElderReturnsTrueIfAnotherMemberIsElder() throws InterruptedException {
     ClusterElderManager clusterElderManager = new ClusterElderManager(clusterDistributionManager);
     when(clusterDistributionManager.getId()).thenReturn(member0);
     when(clusterDistributionManager.getViewMembers()).thenReturn(Arrays.asList(member1, member0));
@@ -131,7 +131,7 @@ public class ClusterElderManagerTest {
   }
 
   @Test
-  public void waitForElderReturnsFalseIfWeAreElder() {
+  public void waitForElderReturnsFalseIfWeAreElder() throws InterruptedException {
     ClusterElderManager clusterElderManager = new ClusterElderManager(clusterDistributionManager);
     when(clusterDistributionManager.getId()).thenReturn(member0);
     when(clusterDistributionManager.isCurrentMember(eq(member1))).thenReturn(true);
@@ -140,7 +140,8 @@ public class ClusterElderManagerTest {
   }
 
   @Test
-  public void waitForElderReturnsFalseIfDesiredElderIsNotACurrentMember() {
+  public void waitForElderReturnsFalseIfDesiredElderIsNotACurrentMember()
+      throws InterruptedException {
     ClusterElderManager clusterElderManager = new ClusterElderManager(clusterDistributionManager);
     when(clusterDistributionManager.getId()).thenReturn(member0);
     when(clusterDistributionManager.getViewMembers())
@@ -154,8 +155,24 @@ public class ClusterElderManagerTest {
     when(clusterDistributionManager.getId()).thenReturn(member0);
     when(clusterDistributionManager.getViewMembers()).thenReturn(Arrays.asList(member1, member0));
     when(clusterDistributionManager.isCurrentMember(eq(member0))).thenReturn(true);
+    when(clusterDistributionManager.isCloseInProgress()).thenReturn(false);
+
+    assertThatInterruptableRunnableWaits(() -> {
+      try {
+        clusterElderManager.waitForElder(member0);
+      } catch (InterruptedException e) {
+      }
+    });
+  }
 
-    assertThatRunnableWaits(() -> clusterElderManager.waitForElder(member0));
+  @Test
+  public void waitForElderDoesNotWaitIfShuttingDown() throws InterruptedException {
+    ClusterElderManager clusterElderManager = new ClusterElderManager(clusterDistributionManager);
+    when(clusterDistributionManager.getId()).thenReturn(member0);
+    when(clusterDistributionManager.getViewMembers()).thenReturn(Arrays.asList(member1, member0));
+    when(clusterDistributionManager.isCurrentMember(eq(member0))).thenReturn(true);
+    when(clusterDistributionManager.isCloseInProgress()).thenReturn(true);
+    assertThat(clusterElderManager.waitForElder(member0)).isFalse();
   }
 
   @Test
@@ -193,7 +210,7 @@ public class ClusterElderManagerTest {
   }
 
   @Test
-  public void getElderStateAsElder() {
+  public void getElderStateAsElder() throws InterruptedException {
     Supplier<ElderState> elderStateSupplier = mock(Supplier.class);
     ElderState elderState = mock(ElderState.class);
     when(elderStateSupplier.get()).thenReturn(elderState);
@@ -207,7 +224,7 @@ public class ClusterElderManagerTest {
   }
 
   @Test
-  public void getElderStateGetsBuiltOnceAsElder() {
+  public void getElderStateGetsBuiltOnceAsElder() throws InterruptedException {
     Supplier<ElderState> elderStateSupplier = mock(Supplier.class);
     ElderState elderState = mock(ElderState.class);
     when(elderStateSupplier.get()).thenReturn(elderState);
@@ -244,7 +261,7 @@ public class ClusterElderManagerTest {
   }
 
   @Test
-  public void getElderStateNotAsElder() {
+  public void getElderStateNotAsElder() throws InterruptedException {
     Supplier<ElderState> elderStateSupplier = mock(Supplier.class);
     ClusterElderManager clusterElderManager =
         new ClusterElderManager(clusterDistributionManager, elderStateSupplier);
@@ -264,12 +281,17 @@ public class ClusterElderManagerTest {
     when(clusterDistributionManager.getViewMembers()).thenReturn(Arrays.asList(member1, member0));
     when(clusterDistributionManager.isCurrentMember(eq(member0))).thenReturn(true);
 
-    assertThatRunnableWaits(() -> clusterElderManager.getElderState(true));
+    assertThatInterruptableRunnableWaits(() -> {
+      try {
+        clusterElderManager.getElderState(true);
+      } catch (InterruptedException e) {
+      }
+    });
 
     verify(elderStateSupplier, times(0)).get();
   }
 
-  private void assertThatRunnableWaits(Runnable runnable) {
+  private void assertThatInterruptableRunnableWaits(Runnable runnable) {
     Thread waitThread = new Thread(runnable);
 
     waitThread.start();
@@ -281,6 +303,7 @@ public class ClusterElderManagerTest {
           .until(() -> waitingStates.contains(waitThread.getState()));
     } finally {
       waitThread.interrupt();
+      await().until(() -> !waitThread.isAlive());
     }
   }
 }