You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@geode.apache.org by bs...@apache.org on 2019/01/03 20:59:13 UTC

[geode] 01/01: GEODE-6244 Healthy member kicked out by Sick member

This is an automated email from the ASF dual-hosted git repository.

bschuchardt pushed a commit to branch feature/GEODE-6244
in repository https://gitbox.apache.org/repos/asf/geode.git

commit 25134b19e2a324ff04c3a3d1139bafe641031729
Author: Bruce Schuchardt <bs...@pivotal.io>
AuthorDate: Thu Jan 3 12:49:56 2019 -0800

    GEODE-6244 Healthy member kicked out by Sick member
    
    GMSMembershipManager.verifyMember() should not initiate direct removal
    of the target member if an availability check fails.  Instead it should
    initiate suspect processing.
    
    This adds new unit tests for GMSHealthMonitor.checkIfAvailable() and
    changes the availability check to initiate suspect processing if the
    check fails.
---
 .../gms/fd/GMSHealthMonitorJUnitTest.java          | 34 ++++++++++++++++++++++
 .../membership/gms/fd/GMSHealthMonitor.java        |  7 ++---
 .../membership/gms/mgr/GMSMembershipManager.java   |  2 +-
 3 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/geode-core/src/integrationTest/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitorJUnitTest.java b/geode-core/src/integrationTest/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitorJUnitTest.java
index 8001aed..baab496 100644
--- a/geode-core/src/integrationTest/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitorJUnitTest.java
+++ b/geode-core/src/integrationTest/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitorJUnitTest.java
@@ -615,6 +615,40 @@ public class GMSHealthMonitorJUnitTest {
     assertTrue(gmsHealthMonitor.isSuspectMember(memberToCheck));
   }
 
+  /**
+   * a failed availablility check should initiate suspect processing
+   */
+  @Test
+  public void testFailedCheckIfAvailableDoesNotRemoveMember() {
+    NetView v = installAView();
+
+    setFailureDetectionPorts(v);
+
+    InternalDistributedMember memberToCheck = gmsHealthMonitor.getNextNeighbor();
+    boolean available = gmsHealthMonitor.checkIfAvailable(memberToCheck, "Not responding", false);
+    assertFalse(available);
+    verify(joinLeave, never()).remove(isA(InternalDistributedMember.class), isA(String.class));
+    assertTrue(gmsHealthMonitor.isSuspectMember(memberToCheck));
+    verify(messenger).send(isA(SuspectMembersMessage.class));
+  }
+
+
+  /**
+   * Same test as above but with request to initiate removal
+   */
+  @Test
+  public void testFailedCheckIfAvailableRemovesMember() {
+    NetView v = installAView();
+
+    setFailureDetectionPorts(v);
+
+    InternalDistributedMember memberToCheck = gmsHealthMonitor.getNextNeighbor();
+    boolean available = gmsHealthMonitor.checkIfAvailable(memberToCheck, "Not responding", true);
+    assertFalse(available);
+    verify(joinLeave).remove(isA(InternalDistributedMember.class), isA(String.class));
+  }
+
+
 
   @Test
   public void testShutdown() {
diff --git a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitor.java b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitor.java
index d5e5a39..cccf285 100644
--- a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitor.java
+++ b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitor.java
@@ -436,10 +436,7 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
     if (services.getJoinLeave().isMemberLeaving(mbr)) {
       return;
     }
-    SuspectRequest sr = new SuspectRequest(mbr, reason);
-    List<SuspectRequest> sl = new ArrayList<>();
-    sl.add(sr);
-    sendSuspectRequest(sl);
+    sendSuspectRequest(Collections.singletonList(new SuspectRequest(mbr, reason)));
   }
 
   /**
@@ -1278,6 +1275,8 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
           if (initiateRemoval) {
             logger.info("Requesting removal of suspect member {}", mbr);
             services.getJoinLeave().remove(mbr, reason);
+          } else {
+            initiateSuspicion(mbr, reason);
           }
           // make sure it is still suspected
           memberSuspected(localAddress, mbr, reason);
diff --git a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/mgr/GMSMembershipManager.java b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
index 5eb36fd..c022b30 100644
--- a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
+++ b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
@@ -1649,7 +1649,7 @@ public class GMSMembershipManager implements MembershipManager, Manager {
    */
   public boolean verifyMember(DistributedMember mbr, String reason) {
     return mbr != null && memberExists(mbr)
-        && this.services.getHealthMonitor().checkIfAvailable(mbr, reason, true);
+        && this.services.getHealthMonitor().checkIfAvailable(mbr, reason, false);
   }
 
   /**