You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@geode.apache.org by bs...@apache.org on 2018/08/14 20:25:47 UTC
[geode] branch feature/GEODE-3780 updated: GEODE-3780 suspected
member is never watched again after passing final check
This is an automated email from the ASF dual-hosted git repository.
bschuchardt pushed a commit to branch feature/GEODE-3780
in repository https://gitbox.apache.org/repos/asf/geode.git
The following commit(s) were added to refs/heads/feature/GEODE-3780 by this push:
new 38b75a9 GEODE-3780 suspected member is never watched again after passing final check
38b75a9 is described below
commit 38b75a90b2164c0dfd3deb8ef21b059befc9168b
Author: Bruce Schuchardt <bs...@pivotal.io>
AuthorDate: Tue Aug 14 13:23:56 2018 -0700
GEODE-3780 suspected member is never watched again after passing final check
Changes to address Darrel's review comments
---
.../internal/membership/gms/ServiceConfig.java | 27 ++++------------------
.../membership/gms/fd/GMSHealthMonitor.java | 13 +++++++----
.../membership/gms/membership/GMSJoinLeave.java | 4 +++-
3 files changed, 15 insertions(+), 29 deletions(-)
diff --git a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/ServiceConfig.java b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/ServiceConfig.java
index 395e94d..09e2ed8 100644
--- a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/ServiceConfig.java
+++ b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/ServiceConfig.java
@@ -27,11 +27,12 @@ public class ServiceConfig {
public static final long MEMBER_REQUEST_COLLECTION_INTERVAL =
Long.getLong(DistributionConfig.GEMFIRE_PREFIX + "member-request-collection-interval", 300);
+ /** in a small cluster we might want to involve all members in operations */
+ public static final int SMALL_CLUSTER_SIZE = 9;
+
/** various settings from Geode configuration */
private final long joinTimeout;
private final int[] membershipPortRange;
- private final int udpRecvBufferSize;
- private final int udpSendBufferSize;
private final long memberTimeout;
private Integer lossThreshold;
private final Integer memberWeight;
@@ -79,12 +80,8 @@ public class ServiceConfig {
return networkPartitionDetectionEnabled;
}
- public void setNetworkPartitionDetectionEnabled(boolean enabled) {
- this.networkPartitionDetectionEnabled = enabled;
- }
-
public boolean areLocatorsPreferredAsCoordinators() {
- boolean locatorsAreCoordinators = false;
+ boolean locatorsAreCoordinators;
if (networkPartitionDetectionEnabled) {
locatorsAreCoordinators = true;
@@ -139,24 +136,8 @@ public class ServiceConfig {
membershipPortRange = theConfig.getMembershipPortRange();
- udpRecvBufferSize = DistributionConfig.DEFAULT_UDP_RECV_BUFFER_SIZE_REDUCED;
- udpSendBufferSize = theConfig.getUdpSendBufferSize();
-
memberTimeout = theConfig.getMemberTimeout();
- // The default view-ack timeout in 7.0 is 12347 ms but is adjusted based on the member-timeout.
- // We don't want a longer timeout than 12437 because new members will likely time out trying to
- // connect because their join timeouts are set to expect a shorter period
- int ackCollectionTimeout = theConfig.getMemberTimeout() * 2 * 12437 / 10000;
- if (ackCollectionTimeout < 1500) {
- ackCollectionTimeout = 1500;
- } else if (ackCollectionTimeout > 12437) {
- ackCollectionTimeout = 12437;
- }
- ackCollectionTimeout = Integer
- .getInteger(DistributionConfig.GEMFIRE_PREFIX + "VIEW_ACK_TIMEOUT", ackCollectionTimeout)
- .intValue();
-
lossThreshold =
Integer.getInteger(DistributionConfig.GEMFIRE_PREFIX + "network-partition-threshold", 51);
if (lossThreshold < 51)
diff --git a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitor.java b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitor.java
index d66155b..f31a0c3 100644
--- a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitor.java
+++ b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitor.java
@@ -66,6 +66,7 @@ import org.apache.geode.distributed.internal.InternalDistributedSystem;
import org.apache.geode.distributed.internal.membership.InternalDistributedMember;
import org.apache.geode.distributed.internal.membership.NetView;
import org.apache.geode.distributed.internal.membership.gms.GMSMember;
+import org.apache.geode.distributed.internal.membership.gms.ServiceConfig;
import org.apache.geode.distributed.internal.membership.gms.Services;
import org.apache.geode.distributed.internal.membership.gms.interfaces.HealthMonitor;
import org.apache.geode.distributed.internal.membership.gms.interfaces.MessageHandler;
@@ -1001,8 +1002,8 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
@Override
public void memberSuspected(InternalDistributedMember initiator,
InternalDistributedMember suspect, String reason) {
- suspectedMemberIds.putIfAbsent(suspect, currentView);
synchronized (suspectRequestsInView) {
+ suspectedMemberIds.putIfAbsent(suspect, currentView);
Collection<SuspectRequest> requests = suspectRequestsInView.get(currentView);
boolean found = false;
if (requests == null) {
@@ -1022,9 +1023,10 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
}
private void memberUnsuspected(InternalDistributedMember mbr) {
- logger.info("No longer suspecting {}", mbr);
- suspectedMemberIds.remove(mbr);
synchronized (suspectRequestsInView) {
+ if (suspectedMemberIds.remove(mbr) != null) {
+ logger.info("No longer suspecting {}", mbr);
+ }
Collection<SuspectRequest> suspectRequests = suspectRequestsInView.get(currentView);
if (suspectRequests != null) {
Collection<SuspectRequest> removals = new ArrayList<>(suspectRequests.size());
@@ -1361,7 +1363,7 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
private void sendSuspectRequest(final List<SuspectRequest> requests) {
logger.debug("Sending suspect request for members {}", requests);
List<InternalDistributedMember> recipients;
- if (currentView.size() > 9) {
+ if (currentView.size() > ServiceConfig.SMALL_CLUSTER_SIZE) {
HashSet<InternalDistributedMember> filter = new HashSet<>();
for (Enumeration<InternalDistributedMember> e = suspectedMemberIds.keys(); e
.hasMoreElements();) {
@@ -1370,7 +1372,8 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
filter.addAll(
requests.stream().map(SuspectRequest::getSuspectMember).collect(Collectors.toList()));
recipients =
- currentView.getPreferredCoordinators(filter, services.getJoinLeave().getMemberID(), 10);
+ currentView.getPreferredCoordinators(filter, services.getJoinLeave().getMemberID(),
+ ServiceConfig.SMALL_CLUSTER_SIZE + 1);
} else {
recipients = currentView.getMembers();
}
diff --git a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeave.java b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeave.java
index 45b17bf..7b257ef 100644
--- a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeave.java
+++ b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeave.java
@@ -65,6 +65,7 @@ import org.apache.geode.distributed.internal.membership.NetMember;
import org.apache.geode.distributed.internal.membership.NetView;
import org.apache.geode.distributed.internal.membership.gms.GMSMember;
import org.apache.geode.distributed.internal.membership.gms.GMSUtil;
+import org.apache.geode.distributed.internal.membership.gms.ServiceConfig;
import org.apache.geode.distributed.internal.membership.gms.Services;
import org.apache.geode.distributed.internal.membership.gms.interfaces.JoinLeave;
import org.apache.geode.distributed.internal.membership.gms.interfaces.MessageHandler;
@@ -1660,7 +1661,8 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
processRemoveRequest(msg);
if (!this.isCoordinator) {
msg.resetRecipients();
- msg.setRecipients(v.getPreferredCoordinators(Collections.emptySet(), localAddress, 10));
+ msg.setRecipients(v.getPreferredCoordinators(Collections.emptySet(), localAddress,
+ ServiceConfig.SMALL_CLUSTER_SIZE + 1));
services.getMessenger().send(msg);
}
} else {