You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@geode.apache.org by bs...@apache.org on 2018/08/14 20:25:47 UTC

[geode] branch feature/GEODE-3780 updated: GEODE-3780 suspected member is never watched again after passing final check

This is an automated email from the ASF dual-hosted git repository.

bschuchardt pushed a commit to branch feature/GEODE-3780
in repository https://gitbox.apache.org/repos/asf/geode.git


The following commit(s) were added to refs/heads/feature/GEODE-3780 by this push:
     new 38b75a9  GEODE-3780 suspected member is never watched again after passing final check
38b75a9 is described below

commit 38b75a90b2164c0dfd3deb8ef21b059befc9168b
Author: Bruce Schuchardt <bs...@pivotal.io>
AuthorDate: Tue Aug 14 13:23:56 2018 -0700

    GEODE-3780 suspected member is never watched again after passing final check
    
    Changes to address Darrel's review comments
---
 .../internal/membership/gms/ServiceConfig.java     | 27 ++++------------------
 .../membership/gms/fd/GMSHealthMonitor.java        | 13 +++++++----
 .../membership/gms/membership/GMSJoinLeave.java    |  4 +++-
 3 files changed, 15 insertions(+), 29 deletions(-)

diff --git a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/ServiceConfig.java b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/ServiceConfig.java
index 395e94d..09e2ed8 100644
--- a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/ServiceConfig.java
+++ b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/ServiceConfig.java
@@ -27,11 +27,12 @@ public class ServiceConfig {
   public static final long MEMBER_REQUEST_COLLECTION_INTERVAL =
       Long.getLong(DistributionConfig.GEMFIRE_PREFIX + "member-request-collection-interval", 300);
 
+  /** in a small cluster we might want to involve all members in operations */
+  public static final int SMALL_CLUSTER_SIZE = 9;
+
   /** various settings from Geode configuration */
   private final long joinTimeout;
   private final int[] membershipPortRange;
-  private final int udpRecvBufferSize;
-  private final int udpSendBufferSize;
   private final long memberTimeout;
   private Integer lossThreshold;
   private final Integer memberWeight;
@@ -79,12 +80,8 @@ public class ServiceConfig {
     return networkPartitionDetectionEnabled;
   }
 
-  public void setNetworkPartitionDetectionEnabled(boolean enabled) {
-    this.networkPartitionDetectionEnabled = enabled;
-  }
-
   public boolean areLocatorsPreferredAsCoordinators() {
-    boolean locatorsAreCoordinators = false;
+    boolean locatorsAreCoordinators;
 
     if (networkPartitionDetectionEnabled) {
       locatorsAreCoordinators = true;
@@ -139,24 +136,8 @@ public class ServiceConfig {
 
     membershipPortRange = theConfig.getMembershipPortRange();
 
-    udpRecvBufferSize = DistributionConfig.DEFAULT_UDP_RECV_BUFFER_SIZE_REDUCED;
-    udpSendBufferSize = theConfig.getUdpSendBufferSize();
-
     memberTimeout = theConfig.getMemberTimeout();
 
-    // The default view-ack timeout in 7.0 is 12347 ms but is adjusted based on the member-timeout.
-    // We don't want a longer timeout than 12437 because new members will likely time out trying to
-    // connect because their join timeouts are set to expect a shorter period
-    int ackCollectionTimeout = theConfig.getMemberTimeout() * 2 * 12437 / 10000;
-    if (ackCollectionTimeout < 1500) {
-      ackCollectionTimeout = 1500;
-    } else if (ackCollectionTimeout > 12437) {
-      ackCollectionTimeout = 12437;
-    }
-    ackCollectionTimeout = Integer
-        .getInteger(DistributionConfig.GEMFIRE_PREFIX + "VIEW_ACK_TIMEOUT", ackCollectionTimeout)
-        .intValue();
-
     lossThreshold =
         Integer.getInteger(DistributionConfig.GEMFIRE_PREFIX + "network-partition-threshold", 51);
     if (lossThreshold < 51)
diff --git a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitor.java b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitor.java
index d66155b..f31a0c3 100644
--- a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitor.java
+++ b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitor.java
@@ -66,6 +66,7 @@ import org.apache.geode.distributed.internal.InternalDistributedSystem;
 import org.apache.geode.distributed.internal.membership.InternalDistributedMember;
 import org.apache.geode.distributed.internal.membership.NetView;
 import org.apache.geode.distributed.internal.membership.gms.GMSMember;
+import org.apache.geode.distributed.internal.membership.gms.ServiceConfig;
 import org.apache.geode.distributed.internal.membership.gms.Services;
 import org.apache.geode.distributed.internal.membership.gms.interfaces.HealthMonitor;
 import org.apache.geode.distributed.internal.membership.gms.interfaces.MessageHandler;
@@ -1001,8 +1002,8 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
   @Override
   public void memberSuspected(InternalDistributedMember initiator,
       InternalDistributedMember suspect, String reason) {
-    suspectedMemberIds.putIfAbsent(suspect, currentView);
     synchronized (suspectRequestsInView) {
+      suspectedMemberIds.putIfAbsent(suspect, currentView);
       Collection<SuspectRequest> requests = suspectRequestsInView.get(currentView);
       boolean found = false;
       if (requests == null) {
@@ -1022,9 +1023,10 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
   }
 
   private void memberUnsuspected(InternalDistributedMember mbr) {
-    logger.info("No longer suspecting {}", mbr);
-    suspectedMemberIds.remove(mbr);
     synchronized (suspectRequestsInView) {
+      if (suspectedMemberIds.remove(mbr) != null) {
+        logger.info("No longer suspecting {}", mbr);
+      }
       Collection<SuspectRequest> suspectRequests = suspectRequestsInView.get(currentView);
       if (suspectRequests != null) {
         Collection<SuspectRequest> removals = new ArrayList<>(suspectRequests.size());
@@ -1361,7 +1363,7 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
   private void sendSuspectRequest(final List<SuspectRequest> requests) {
     logger.debug("Sending suspect request for members {}", requests);
     List<InternalDistributedMember> recipients;
-    if (currentView.size() > 9) {
+    if (currentView.size() > ServiceConfig.SMALL_CLUSTER_SIZE) {
       HashSet<InternalDistributedMember> filter = new HashSet<>();
       for (Enumeration<InternalDistributedMember> e = suspectedMemberIds.keys(); e
           .hasMoreElements();) {
@@ -1370,7 +1372,8 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
       filter.addAll(
           requests.stream().map(SuspectRequest::getSuspectMember).collect(Collectors.toList()));
       recipients =
-          currentView.getPreferredCoordinators(filter, services.getJoinLeave().getMemberID(), 10);
+          currentView.getPreferredCoordinators(filter, services.getJoinLeave().getMemberID(),
+              ServiceConfig.SMALL_CLUSTER_SIZE + 1);
     } else {
       recipients = currentView.getMembers();
     }
diff --git a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeave.java b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeave.java
index 45b17bf..7b257ef 100644
--- a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeave.java
+++ b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeave.java
@@ -65,6 +65,7 @@ import org.apache.geode.distributed.internal.membership.NetMember;
 import org.apache.geode.distributed.internal.membership.NetView;
 import org.apache.geode.distributed.internal.membership.gms.GMSMember;
 import org.apache.geode.distributed.internal.membership.gms.GMSUtil;
+import org.apache.geode.distributed.internal.membership.gms.ServiceConfig;
 import org.apache.geode.distributed.internal.membership.gms.Services;
 import org.apache.geode.distributed.internal.membership.gms.interfaces.JoinLeave;
 import org.apache.geode.distributed.internal.membership.gms.interfaces.MessageHandler;
@@ -1660,7 +1661,8 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
       processRemoveRequest(msg);
       if (!this.isCoordinator) {
         msg.resetRecipients();
-        msg.setRecipients(v.getPreferredCoordinators(Collections.emptySet(), localAddress, 10));
+        msg.setRecipients(v.getPreferredCoordinators(Collections.emptySet(), localAddress,
+            ServiceConfig.SMALL_CLUSTER_SIZE + 1));
         services.getMessenger().send(msg);
       }
     } else {