You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@geode.apache.org by bs...@apache.org on 2015/09/03 01:42:48 UTC
incubator-geode git commit: GEODE-77 bug fixes for LocatorDUnitTest
Repository: incubator-geode
Updated Branches:
refs/heads/feature/GEODE-77 6bf1f2094 -> 017861064
GEODE-77 bug fixes for LocatorDUnitTest
All tests in LocatorDUnitTest now pass except the quorum check,
which needs a test hook. GMSJoinLeaveJUnitTest, GMSHealthMonitoryJUnitTest
(except for testSuspectMembersCalledThroughSuspectThread) and
MembershipJUnitTest are all passing with these changes.
Project: http://git-wip-us.apache.org/repos/asf/incubator-geode/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-geode/commit/01786106
Tree: http://git-wip-us.apache.org/repos/asf/incubator-geode/tree/01786106
Diff: http://git-wip-us.apache.org/repos/asf/incubator-geode/diff/01786106
Branch: refs/heads/feature/GEODE-77
Commit: 0178610645178cc995ca60992d00368a266f1378
Parents: 6bf1f20
Author: Bruce Schuchardt <bs...@pivotal.io>
Authored: Wed Sep 2 16:41:26 2015 -0700
Committer: Bruce Schuchardt <bs...@pivotal.io>
Committed: Wed Sep 2 16:42:33 2015 -0700
----------------------------------------------------------------------
.../internal/DistributionManager.java | 2 -
.../internal/membership/NetMember.java | 2 +
.../internal/membership/NetView.java | 10 +-
.../internal/membership/gms/ServiceConfig.java | 2 +
.../internal/membership/gms/Services.java | 27 ++-
.../membership/gms/fd/GMSHealthMonitor.java | 75 +++++---
.../membership/gms/locator/GMSLocator.java | 8 +-
.../membership/gms/membership/GMSJoinLeave.java | 84 ++++++---
.../gms/messages/PingRequestMessage.java | 12 +-
.../gms/messages/PingResponseMessage.java | 2 +-
.../gms/messenger/AddressManager.java | 4 +-
.../gms/messenger/JGroupsMessenger.java | 11 +-
.../gms/mgr/GMSMembershipManager.java | 13 +-
.../gemfire/distributed/LocatorDUnitTest.java | 178 ++++++++++---------
.../membership/GMSHealthMonitorJUnitTest.java | 5 +-
.../test/java/dunit/DistributedTestCase.java | 4 +-
16 files changed, 272 insertions(+), 167 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/DistributionManager.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/DistributionManager.java b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/DistributionManager.java
index e8fe361..a6a425b 100644
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/DistributionManager.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/DistributionManager.java
@@ -629,8 +629,6 @@ public class DistributionManager
logger.info(LogMarker.DM, LocalizedMessage.create(
LocalizedStrings.DistributionManager_DISTRIBUTIONMANAGER_0_STARTED_ON_1_THERE_WERE_2_OTHER_DMS_3_4_5, logArgs));
-logger.info("My ID is {}", Integer.toHexString(System.identityHashCode(dm.getDistributionManagerId())));
-
MembershipLogger.logStartup(dm.getDistributionManagerId());
}
return dm;
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/NetMember.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/NetMember.java b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/NetMember.java
index 8966e0f..a110ff2 100755
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/NetMember.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/NetMember.java
@@ -42,6 +42,8 @@ public interface NetMember
*/
public boolean splitBrainEnabled();
+ public void setSplitBrainEnabled(boolean enabled);
+
/**
* return a flag stating whether the member can be the membership coordinator
* @since 5.6
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/NetView.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/NetView.java b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/NetView.java
index 46ac284..3ebae8f 100644
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/NetView.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/NetView.java
@@ -205,16 +205,17 @@ public class NetView implements DataSerializableFixedID {
}
/***
- * This functions returns the list of all preferred coordinators.
+ * This functions returns the list of preferred coordinators.
* One random member from list of non-preferred member list. It make
* sure that random member is not in suspected Set.
* And local member.
*
* @param filter Suspect member set.
* @param localAddress
+ * @param maxNumberDesired number of preferred coordinators to return
* @return list of preferred coordinators
*/
- public List<InternalDistributedMember> getAllPreferredCoordinators(Set<InternalDistributedMember> filter, InternalDistributedMember localAddress) {
+ public List<InternalDistributedMember> getPreferredCoordinators(Set<InternalDistributedMember> filter, InternalDistributedMember localAddress, int maxNumberDesired) {
List<InternalDistributedMember> results = new ArrayList<InternalDistributedMember>();
List<InternalDistributedMember> notPreferredCoordinatorList = new ArrayList<InternalDistributedMember>();
@@ -224,7 +225,10 @@ public class NetView implements DataSerializableFixedID {
continue;// this is must to add
}
if (addr.getNetMember().preferredForCoordinator()) {
- results.add(addr);// add all preferred coordinator
+ results.add(addr);
+ if (results.size() >= maxNumberDesired) {
+ break;
+ }
} else if (!filter.contains(addr)) {
notPreferredCoordinatorList.add(addr);
}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/ServiceConfig.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/ServiceConfig.java b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/ServiceConfig.java
index d6b4d47..c66e262 100755
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/ServiceConfig.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/ServiceConfig.java
@@ -135,6 +135,8 @@ public class ServiceConfig {
memberWeight = Integer.getInteger("gemfire.member-weight", 0);
locatorWaitTime = theConfig.getLocatorWaitTime();
+
+ networkPartitionDetectionEnabled = theConfig.getEnableNetworkPartitionDetection();
}
}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/Services.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/Services.java b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/Services.java
index aa7bc1e..efc95ac 100755
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/Services.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/Services.java
@@ -7,6 +7,7 @@ import org.apache.logging.log4j.Logger;
import com.gemstone.gemfire.CancelCriterion;
import com.gemstone.gemfire.CancelException;
import com.gemstone.gemfire.LogWriter;
+import com.gemstone.gemfire.distributed.DistributedSystemDisconnectedException;
import com.gemstone.gemfire.distributed.internal.DMStats;
import com.gemstone.gemfire.distributed.internal.DistributionConfig;
import com.gemstone.gemfire.distributed.internal.InternalLocator;
@@ -149,6 +150,18 @@ public class Services {
}
public void emergencyClose() {
+ logger.info("Membership: stopping services");
+ if (stopping) {
+ return;
+ }
+ stopping = true;
+ this.joinLeave.emergencyClose();
+ this.healthMon.emergencyClose();
+ this.auth.emergencyClose();
+ this.messenger.emergencyClose();
+ this.manager.emergencyClose();
+ this.timer.cancel();
+ this.cancelCriterion.cancel("Membership services are shut down");
}
public void stop() {
@@ -262,18 +275,14 @@ public class Services {
return null;
}
else {
- return new ServicesStoppedException(reasonForStopping, e);
+ if (e == null) {
+ return new DistributedSystemDisconnectedException(reasonForStopping);
+ } else {
+ return new DistributedSystemDisconnectedException(reasonForStopping, e);
+ }
}
}
}
- public static class ServicesStoppedException extends CancelException {
- private static final long serialVersionUID = 2134474966059876801L;
-
- public ServicesStoppedException(String message, Throwable cause) {
- super(message, cause);
- }
- }
-
}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/fd/GMSHealthMonitor.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/fd/GMSHealthMonitor.java b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/fd/GMSHealthMonitor.java
index babf9e3..84f537d 100755
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/fd/GMSHealthMonitor.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/fd/GMSHealthMonitor.java
@@ -6,7 +6,6 @@ import static com.gemstone.gemfire.internal.DataSerializableFixedID.SUSPECT_MEMB
import java.util.ArrayList;
import java.util.Collection;
-import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
@@ -25,22 +24,17 @@ import java.util.concurrent.atomic.AtomicInteger;
import org.apache.logging.log4j.Logger;
import com.gemstone.gemfire.distributed.DistributedMember;
+import com.gemstone.gemfire.distributed.DistributedSystemDisconnectedException;
import com.gemstone.gemfire.distributed.internal.DistributionMessage;
import com.gemstone.gemfire.distributed.internal.membership.InternalDistributedMember;
import com.gemstone.gemfire.distributed.internal.membership.NetView;
import com.gemstone.gemfire.distributed.internal.membership.gms.Services;
import com.gemstone.gemfire.distributed.internal.membership.gms.interfaces.HealthMonitor;
import com.gemstone.gemfire.distributed.internal.membership.gms.interfaces.MessageHandler;
-import com.gemstone.gemfire.distributed.internal.membership.gms.messages.InstallViewMessage;
-import com.gemstone.gemfire.distributed.internal.membership.gms.messages.JoinRequestMessage;
-import com.gemstone.gemfire.distributed.internal.membership.gms.messages.JoinResponseMessage;
-import com.gemstone.gemfire.distributed.internal.membership.gms.messages.LeaveRequestMessage;
import com.gemstone.gemfire.distributed.internal.membership.gms.messages.PingRequestMessage;
import com.gemstone.gemfire.distributed.internal.membership.gms.messages.PingResponseMessage;
-import com.gemstone.gemfire.distributed.internal.membership.gms.messages.RemoveMemberMessage;
import com.gemstone.gemfire.distributed.internal.membership.gms.messages.SuspectMembersMessage;
import com.gemstone.gemfire.distributed.internal.membership.gms.messages.SuspectRequest;
-import com.gemstone.gemfire.distributed.internal.membership.gms.messages.ViewAckMessage;
/**
* Failure Detection
@@ -173,10 +167,20 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
GMSHealthMonitor.this.currentTimeStamp = currentTime;
if (neighbour != null) {
- CustomTimeStamp nextNeigbourTS = GMSHealthMonitor.this.memberVsLastMsgTS.get(neighbour);
+ CustomTimeStamp nextNeighbourTS;
+ synchronized(GMSHealthMonitor.this) {
+ nextNeighbourTS = GMSHealthMonitor.this.memberVsLastMsgTS.get(neighbour);
+ }
+ if (nextNeighbourTS == null) {
+ CustomTimeStamp customTS = new CustomTimeStamp();
+ customTS.setTimeStamp(System.currentTimeMillis());
+ memberVsLastMsgTS.put(neighbour, customTS);
+ return;
+ }
+
long interval = memberTimeoutInMillis / GMSHealthMonitor.LOGICAL_INTERVAL;
- long lastTS = currentTime - nextNeigbourTS.getTimeStamp();
+ long lastTS = currentTime - nextNeighbourTS.getTimeStamp();
if (lastTS + interval >= memberTimeoutInMillis) {
logger.debug("Checking member {} ", neighbour);
// now do check request for this member;
@@ -206,7 +210,7 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
private PingRequestMessage constructPingRequestMessage(final InternalDistributedMember pingMember) {
final int reqId = requestId.getAndIncrement();
- final PingRequestMessage prm = new PingRequestMessage(reqId);
+ final PingRequestMessage prm = new PingRequestMessage(pingMember, reqId);
prm.setRecipient(pingMember);
return prm;
@@ -228,7 +232,7 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
String reason = String.format("Member isn't responding to check message: %s", pingMember);
GMSHealthMonitor.this.sendSuspectMessage(pingMember, reason);
} else {
- logger.debug("Setting next neighbour as member {} not responded.", pingMember);
+ logger.debug("Setting next neighbour as member {} has not responded.", pingMember);
// back to previous one
setNextNeighbour(GMSHealthMonitor.this.currentView, null);
}
@@ -238,7 +242,7 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
}
private void sendSuspectMessage(InternalDistributedMember mbr, String reason) {
- logger.debug(reason);
+ logger.debug("Suspecting {} reason=\"{}\"", mbr, reason);
SuspectRequest sr = new SuspectRequest(mbr, reason);
List<SuspectRequest> sl = new ArrayList<SuspectRequest>();
sl.add(sr);
@@ -253,7 +257,7 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
*/
private boolean doCheckMember(InternalDistributedMember pingMember) {
//TODO: need to some tcp check
- logger.debug("Checking the member: {}", pingMember);
+ logger.debug("Checking member {}", pingMember);
final PingRequestMessage prm = constructPingRequestMessage(pingMember);
final Response pingResp = new Response();
requestIdVsResponse.put(prm.getRequestId(), pingResp);
@@ -294,7 +298,7 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
synchronized (suspectRequests) {
SuspectRequest sr = new SuspectRequest((InternalDistributedMember) mbr, reason);
if (!suspectRequests.contains(sr)) {
- logger.debug("Adding member {} to suspect for reason {}.", mbr, reason);
+ logger.info("Suspecting member {}. Reason= {}.", mbr, reason);
suspectRequests.add(sr);
suspectRequests.notify();
}
@@ -362,8 +366,8 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
synchronized (viewVsSuspectedMembers) {
viewVsSuspectedMembers.clear();
}
+ setNextNeighbour(newView, null);
currentView = newView;
- setNextNeighbour(currentView, null);
}
/***
@@ -381,11 +385,13 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
}
boolean sameView = false;
- if (newView.equals(currentView)) {
+ if (currentView != null &&
+ newView.getCreator().equals(currentView.getCreator()) &&
+ newView.getViewId() == currentView.getViewId()) {
sameView = true;
}
- List<InternalDistributedMember> allMembers = currentView.getMembers();
+ List<InternalDistributedMember> allMembers = newView.getMembers();
int index = allMembers.indexOf(nextTo);
if (index != -1) {
int nextNeighbourIndex = (index + 1) % allMembers.size();
@@ -394,15 +400,16 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
}
if (!sameView || memberVsLastMsgTS.size() == 0) {
+
if (memberVsLastMsgTS.size() > 0) {
memberVsLastMsgTS.clear();
}
long cts = System.currentTimeMillis();
- for (int i = 0; i < allMembers.size(); i++) {
+ for (InternalDistributedMember mbr: allMembers) {
CustomTimeStamp customTS = new CustomTimeStamp();
customTS.setTimeStamp(cts);
- memberVsLastMsgTS.put(allMembers.get(i), customTS);
+ memberVsLastMsgTS.put(mbr, customTS);
}
}
}
@@ -510,12 +517,18 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
}
private void processPingRequest(PingRequestMessage m) {
- PingResponseMessage prm = new PingResponseMessage(m.getRequestId());
- prm.setRecipient(m.getSender());
- Set<InternalDistributedMember> membersNotReceivedMsg = services.getMessenger().send(prm);
- // TODO: send is throwing exception right now
- if (membersNotReceivedMsg != null && membersNotReceivedMsg.contains(m.getSender())) {
- logger.debug("Unable to send check response to member: {}", m.getSender());
+ // only respond if the intended recipient is this member
+ InternalDistributedMember me = services.getMessenger().getMemberID();
+ if (me.getVmViewId() < 0 || m.getTarget().equals(me)) {
+ PingResponseMessage prm = new PingResponseMessage(m.getRequestId());
+ prm.setRecipient(m.getSender());
+ Set<InternalDistributedMember> membersNotReceivedMsg = services.getMessenger().send(prm);
+ // TODO: send is throwing exception right now
+ if (membersNotReceivedMsg != null && membersNotReceivedMsg.contains(m.getSender())) {
+ logger.debug("Unable to send check response to member: {}", m.getSender());
+ }
+ } else {
+ logger.debug("Ignoring ping request intended for {}. My ID is {}", m.getTarget(), me);
}
}
@@ -540,7 +553,6 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
*/
private void processSuspectMembersRequest(SuspectMembersMessage incomingRequest) {
NetView cv = currentView;
- logger.debug("GMSHealthMonitor.processSuspectMembersRequest invoked for members {}", incomingRequest);
if (cv == null) {
return;
@@ -638,6 +650,8 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
if (!pinged) {
GMSHealthMonitor.this.services.getJoinLeave().remove(mbr, reason);
}
+ } catch (DistributedSystemDisconnectedException e) {
+ return;
} catch (Exception e) {
logger.info("Unexpected exception while verifying member", e);
} finally {
@@ -759,10 +773,15 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
for (int i = 0; i < requests.size(); i++) {
filter.add(requests.get(i).getSuspectMember());
}
- List<InternalDistributedMember> recipients = currentView.getAllPreferredCoordinators(filter, services.getJoinLeave().getMemberID());
+ List<InternalDistributedMember> recipients = currentView.getPreferredCoordinators(filter, services.getJoinLeave().getMemberID(), 5);
SuspectMembersMessage rmm = new SuspectMembersMessage(recipients, requests);
- Set<InternalDistributedMember> failedRecipients = services.getMessenger().send(rmm);
+ Set<InternalDistributedMember> failedRecipients;
+ try {
+ failedRecipients = services.getMessenger().send(rmm);
+ } catch (DistributedSystemDisconnectedException e) {
+ return;
+ }
if (failedRecipients != null && failedRecipients.size() > 0) {
logger.info("Unable to send suspect message to {}", recipients);
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/locator/GMSLocator.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/locator/GMSLocator.java b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/locator/GMSLocator.java
index d9cfb36..66aa0a9 100755
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/locator/GMSLocator.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/locator/GMSLocator.java
@@ -127,8 +127,8 @@ public class GMSLocator implements Locator, NetLocator {
public Object processRequest(Object request) throws IOException {
Object response = null;
- if (logger.isInfoEnabled()) {
- logger.info("Peer locator processing {}", request);
+ if (logger.isDebugEnabled()) {
+ logger.debug("Peer locator processing {}", request);
}
if (request instanceof GetViewRequest) {
@@ -151,7 +151,7 @@ public class GMSLocator implements Locator, NetLocator {
if (view != null) {
coord = view.getCoordinator(findRequest.getRejectedCoordinators());
- logger.info("Peer locator: coordinator from view is {}", coord);
+ logger.debug("Peer locator: coordinator from view is {}", coord);
fromView = true;
}
@@ -180,7 +180,7 @@ public class GMSLocator implements Locator, NetLocator {
}
}
}
- logger.info("Peer locator: coordinator from registrations is {}", coord);
+ logger.debug("Peer locator: coordinator from registrations is {}", coord);
}
}
response = new FindCoordinatorResponse(coord, fromView,
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/membership/GMSJoinLeave.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/membership/GMSJoinLeave.java b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/membership/GMSJoinLeave.java
index 9414dfa..d852e1b 100755
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/membership/GMSJoinLeave.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/membership/GMSJoinLeave.java
@@ -34,6 +34,7 @@ import org.apache.logging.log4j.Logger;
import com.gemstone.gemfire.GemFireConfigException;
import com.gemstone.gemfire.SystemConnectException;
import com.gemstone.gemfire.distributed.DistributedMember;
+import com.gemstone.gemfire.distributed.DistributedSystemDisconnectedException;
import com.gemstone.gemfire.distributed.internal.DistributionConfig;
import com.gemstone.gemfire.distributed.internal.DistributionManager;
import com.gemstone.gemfire.distributed.internal.DistributionMessage;
@@ -238,6 +239,8 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
me.setBirthViewId(birthViewId);
me.setSplitBrainEnabled(o.isSplitBrainEnabled());
me.setPreferredForCoordinator(o.preferredForCoordinator());
+ services.getConfig().getDistributionConfig().setEnableNetworkPartitionDetection(o.isSplitBrainEnabled());
+ services.getConfig().setNetworkPartitionDetectionEnabled(o.isSplitBrainEnabled());
installView(response.getCurrentView());
return true;
} else {
@@ -346,10 +349,7 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
*/
private void processRemoveRequest(RemoveMemberMessage incomingRequest) {
NetView v = currentView;
- if (logger.isDebugEnabled()) {
- logger.debug("JoinLeave.processRemoveRequest invoked. isCoordinator="+isCoordinator+ "; isStopping="+isStopping
- +"; cancelInProgress="+services.getCancelCriterion().isCancelInProgress());
- }
+
InternalDistributedMember mbr = incomingRequest.getMemberID();
if (v != null && !v.contains(incomingRequest.getSender())) {
@@ -358,7 +358,8 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
}
logger.info("Membership received a request to remove " + mbr
- + "; reason="+incomingRequest.getReason());
+ + " from " + incomingRequest.getSender()
+ + " reason="+incomingRequest.getReason());
if (mbr.equals(this.localAddress)) {
// oops - I've been kicked out
@@ -481,12 +482,16 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
boolean sendView(NetView view, Collection<InternalDistributedMember> newMembers, boolean preparing, ViewReplyProcessor rp) {
int id = view.getViewId();
InstallViewMessage msg = new InstallViewMessage(view, services.getAuthenticator().getCredentials(this.localAddress), preparing);
- Set<InternalDistributedMember> recips = new HashSet<InternalDistributedMember>(view.getMembers());
+ Set<InternalDistributedMember> recips = new HashSet<>(view.getMembers());
recips.removeAll(newMembers); // new members get the view in a JoinResponseMessage
recips.remove(this.localAddress); // no need to send it to ourselves
- recips.addAll(view.getCrashedMembers());
+ Set<InternalDistributedMember> responders = recips;
+ if (!view.getCrashedMembers().isEmpty()) {
+ recips = new HashSet<>(recips);
+ recips.addAll(view.getCrashedMembers());
+ }
logger.info((preparing? "preparing" : "sending") + " new view " + view);
@@ -501,7 +506,7 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
}
msg.setRecipients(recips);
- rp.initialize(id, recips);
+ rp.initialize(id, responders);
services.getMessenger().send(msg);
// only wait for responses during preparation
@@ -879,13 +884,20 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
public void remove(InternalDistributedMember m, String reason) {
NetView v = this.currentView;
- if (v != null) {
+ services.getCancelCriterion().checkCancelInProgress(null);
+
+ if (v != null && v.contains(m)) {
Set<InternalDistributedMember> filter = new HashSet<>();
filter.add(m);
- RemoveMemberMessage msg = new RemoveMemberMessage(v.getAllPreferredCoordinators(filter, getMemberID()),
+ RemoveMemberMessage msg = new RemoveMemberMessage(v.getPreferredCoordinators(filter, getMemberID(), 5),
m,
reason);
- services.getMessenger().send(msg);
+ if (this.isCoordinator) {
+ msg.setSender(this.localAddress);
+ processRemoveRequest(msg);
+ } else {
+ services.getMessenger().send(msg);
+ }
}
}
@@ -1024,6 +1036,9 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
&& (services.getCancelCriterion().cancelInProgress() == null)) {
try {
synchronized(result) {
+ if (result.isEmpty() || this.conflictingView != null) {
+ break;
+ }
result.wait(1000);
}
} catch (InterruptedException e) {
@@ -1123,7 +1138,11 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
} // synchronized
if (requests != null && !requests.isEmpty()) {
logger.debug("View Creator is processing {} requests for the next membership view", requests.size());
- /*boolean success = */createAndSendView(requests);
+ try {
+ createAndSendView(requests);
+ } catch (DistributedSystemDisconnectedException e) {
+ shutdown = true;
+ }
requests = null;
}
}
@@ -1136,7 +1155,7 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
* Create a new membership view and send it to members (including crashed members).
* Returns false if the view cannot be prepared successfully, true otherwise
*/
- boolean createAndSendView(List<DistributionMessage> requests) {
+ void createAndSendView(List<DistributionMessage> requests) {
List<InternalDistributedMember> joinReqs = new ArrayList<InternalDistributedMember>();
List<InternalDistributedMember> leaveReqs = new ArrayList<InternalDistributedMember>();
List<InternalDistributedMember> removalReqs = new ArrayList<InternalDistributedMember>();
@@ -1145,7 +1164,7 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
NetView oldView = currentView;
List<InternalDistributedMember> oldMembers;
if (oldView != null) {
- oldMembers = oldView.getMembers();
+ oldMembers = new ArrayList<>(oldView.getMembers());
} else {
oldMembers = Collections.emptyList();
}
@@ -1157,7 +1176,17 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
if (msg instanceof JoinRequestMessage) {
mbr = ((JoinRequestMessage)msg).getMemberID();
- if (!oldMembers.contains(mbr) && !joinReqs.contains(mbr)) {
+
+ boolean duplicate = false;
+ for (InternalDistributedMember m: oldMembers) {
+ // check the netMembers, which wildcards the
+ // viewID to detect old IDs still in the view
+ if (mbr.getNetMember().equals(m.getNetMember())) {
+ duplicate = true;
+ break;
+ }
+ }
+ if (!duplicate && !joinReqs.contains(mbr)) {
joinReqs.add(mbr);
}
}
@@ -1179,6 +1208,14 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
}
}
+
+
+ if (removalReqs.isEmpty() && leaveReqs.isEmpty() && joinReqs.isEmpty()) {
+ return;
+ }
+
+
+
NetView newView;
synchronized(viewInstallationLock) {
int viewNumber = 0;
@@ -1187,7 +1224,7 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
mbrs = new ArrayList<InternalDistributedMember>(joinReqs.size());
} else {
viewNumber = currentView.getViewId()+1;
- mbrs = new ArrayList<InternalDistributedMember>(currentView.getMembers());
+ mbrs = new ArrayList<InternalDistributedMember>(oldMembers);
}
mbrs.addAll(joinReqs);
mbrs.removeAll(leaveReqs);
@@ -1198,6 +1235,7 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
for (InternalDistributedMember mbr: joinReqs) {
mbr.setVmViewId(newView.getViewId());
+ mbr.getNetMember().setSplitBrainEnabled(services.getConfig().isNetworkPartitionDetectionEnabled());
}
// send removal messages before installing the view so we stop
// getting messages from members that have been kicked out
@@ -1207,7 +1245,7 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
// the new view
if (newView.getMembers().equals(currentView.getMembers())) {
logger.info("membership hasn't changed - aborting new view {}", newView);
- return true;
+ return;
}
// we want to always check for quorum loss but don't act on it
@@ -1220,7 +1258,7 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
boolean prepared = false;
do {
if (this.shutdown || Thread.currentThread().isInterrupted()) {
- return false;
+ return;
}
prepared = prepareView(newView, joinReqs);
if (!prepared && quorumRequired) {
@@ -1230,11 +1268,11 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
} catch (InterruptedException e) {
// abort the view if interrupted
shutdown = true;
- return false;
+ return;
}
if (!unresponsive.isEmpty()) {
- List<InternalDistributedMember> failures = new ArrayList<InternalDistributedMember>(currentView.getCrashedMembers().size() + unresponsive.size());
+ List<InternalDistributedMember> failures = new ArrayList<>(currentView.getCrashedMembers().size() + unresponsive.size());
failures.addAll(unresponsive);
NetView conflictingView = prepareProcessor.getConflictingView();
@@ -1250,7 +1288,9 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
if (failures.size() > 0) {
// abort the current view and try again
removalReqs.addAll(failures);
- newView = new NetView(localAddress, newView.getViewId()+1, newView.getMembers(), leaveReqs,
+ List<InternalDistributedMember> newMembers = new ArrayList<>(newView.getMembers());
+ newMembers.removeAll(removalReqs);
+ newView = new NetView(localAddress, newView.getViewId()+1, newMembers, leaveReqs,
removalReqs);
}
}
@@ -1261,7 +1301,7 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
lastConflictingView = null;
sendView(newView, joinReqs);
- return true;
+ return;
}
/**
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messages/PingRequestMessage.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messages/PingRequestMessage.java b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messages/PingRequestMessage.java
index 95c86b6..a38c586 100755
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messages/PingRequestMessage.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messages/PingRequestMessage.java
@@ -13,13 +13,19 @@ import com.gemstone.gemfire.internal.Version;
public class PingRequestMessage extends HighPriorityDistributionMessage{
int requestId;
+ InternalDistributedMember target;
- public PingRequestMessage(int id) {
+ public PingRequestMessage(InternalDistributedMember neighbour, int id) {
requestId = id;
+ this.target = neighbour;
}
public PingRequestMessage(){}
+ public InternalDistributedMember getTarget() {
+ return target;
+ }
+
@Override
public int getDSFID() {
return PING_REQUEST;
@@ -32,7 +38,7 @@ public class PingRequestMessage extends HighPriorityDistributionMessage{
@Override
public String toString() {
- return "PingRequestMessage [requestId=" + requestId + "]";
+ return "PingRequestMessage [requestId=" + requestId + "] from " + getSender();
}
public int getRequestId() {
@@ -47,10 +53,12 @@ public class PingRequestMessage extends HighPriorityDistributionMessage{
@Override
public void toData(DataOutput out) throws IOException {
out.writeInt(requestId);
+ DataSerializer.writeObject(target, out);
}
@Override
public void fromData(DataInput in) throws IOException, ClassNotFoundException {
requestId = in.readInt();
+ target = DataSerializer.readObject(in);
}
}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messages/PingResponseMessage.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messages/PingResponseMessage.java b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messages/PingResponseMessage.java
index 86e154c..ef69e25 100755
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messages/PingResponseMessage.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messages/PingResponseMessage.java
@@ -34,7 +34,7 @@ public class PingResponseMessage extends HighPriorityDistributionMessage {
@Override
public String toString() {
- return "PingResponseMessage [requestId=" + requestId + "]";
+ return "PingResponseMessage [requestId=" + requestId + "] from " + getSender();
}
@Override
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/AddressManager.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/AddressManager.java b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/AddressManager.java
index bfd0aee..a1305d5 100755
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/AddressManager.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/AddressManager.java
@@ -45,13 +45,13 @@ public class AddressManager extends Protocol {
case Event.FIND_MBRS:
List<Address> missing = (List<Address>)evt.getArg();
- logger.debug("AddressManager.FIND_MBRS processing {}", missing);
+// logger.debug("AddressManager.FIND_MBRS processing {}", missing);
Responses responses = new Responses(false);
for (Address laddr: missing) {
try {
if (laddr instanceof JGAddress) {
PingData pd = new PingData(laddr, true, laddr.toString(), newIpAddress(laddr));
- logger.debug("AddressManager.FIND_MBRS adding response {}", pd);
+// logger.debug("AddressManager.FIND_MBRS adding response {}", pd);
responses.addResponse(pd, false);
updateUDPCache(pd);
}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/JGroupsMessenger.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/JGroupsMessenger.java b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/JGroupsMessenger.java
index d7f2723..2004598 100755
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/JGroupsMessenger.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/JGroupsMessenger.java
@@ -403,13 +403,14 @@ public class JGroupsMessenger implements Messenger {
DMStats theStats = services.getStatistics();
if (!myChannel.isConnected()) {
+ logger.info("JGroupsMessenger channel is closed - messaging is not possible");
throw new DistributedSystemDisconnectedException("Distributed System is shutting down");
}
filterOutgoingMessage(msg);
- if (logger.isDebugEnabled()) {
- logger.debug("JGroupsMessenger sending [{}] recipients: {}", msg, msg.getRecipientsDescription());
+ if (logger.isTraceEnabled()) {
+ logger.trace("JGroupsMessenger sending [{}] recipients: {}", msg, msg.getRecipientsDescription());
}
InternalDistributedMember[] destinations = msg.getRecipients();
@@ -660,7 +661,7 @@ public class JGroupsMessenger implements Messenger {
services.getStatistics().endMsgDeserialization(start);
- logger.debug("JGroupsReceiver deserialized {}", result);
+ logger.trace("JGroupsReceiver deserialized {}", result);
}
catch (ClassNotFoundException | IOException | RuntimeException e) {
@@ -769,8 +770,8 @@ public class JGroupsMessenger implements Messenger {
return;
}
- if (logger.isDebugEnabled()) {
- logger.debug("JGroupsMessenger received {} headers: {}", jgmsg, jgmsg.getHeaders());
+ if (logger.isTraceEnabled()) {
+ logger.trace("JGroupsMessenger received {} headers: {}", jgmsg, jgmsg.getHeaders());
}
Object o = readJGMessage(jgmsg);
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/mgr/GMSMembershipManager.java b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
index 9c4d061..1cc079e 100755
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
@@ -1685,12 +1685,6 @@ public class GMSMembershipManager implements MembershipManager, Manager
directChannel.emergencyClose();
}
- services.getJoinLeave().emergencyClose();
- services.getMessenger().emergencyClose();
- services.getHealthMonitor().emergencyClose();
- services.getAuthenticator().emergencyClose();
-
-
// could we guarantee not to allocate objects? We're using Darrel's
// factory, so it's possible that an unsafe implementation could be
// introduced here.
@@ -1765,6 +1759,10 @@ public class GMSMembershipManager implements MembershipManager, Manager
public void uncleanShutdown(String reason, final Exception e) {
inhibitForcedDisconnectLogging(false);
+ if (this.shutdownCause == null) {
+ this.shutdownCause = e;
+ }
+
if (this.directChannel != null) {
this.directChannel.disconnect(e);
}
@@ -2916,6 +2914,9 @@ public class GMSMembershipManager implements MembershipManager, Manager
logger.fatal(LocalizedMessage.create(
LocalizedStrings.GroupMembershipService_MEMBERSHIP_SERVICE_FAILURE_0, reason), shutdownCause);
}
+
+ services.emergencyClose();
+
// stop server locators immediately since they may not have correct
// information. This has caused client failures in bridge/wan
// network-down testing
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/test/java/com/gemstone/gemfire/distributed/LocatorDUnitTest.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/test/java/com/gemstone/gemfire/distributed/LocatorDUnitTest.java b/gemfire-core/src/test/java/com/gemstone/gemfire/distributed/LocatorDUnitTest.java
index 7f1a922..0145efb 100644
--- a/gemfire-core/src/test/java/com/gemstone/gemfire/distributed/LocatorDUnitTest.java
+++ b/gemfire-core/src/test/java/com/gemstone/gemfire/distributed/LocatorDUnitTest.java
@@ -33,6 +33,7 @@ import com.gemstone.gemfire.distributed.internal.membership.MembershipManager;
import com.gemstone.gemfire.distributed.internal.membership.MembershipTestHook;
import com.gemstone.gemfire.distributed.internal.membership.NetView;
import com.gemstone.gemfire.distributed.internal.membership.gms.MembershipManagerHelper;
+import com.gemstone.gemfire.internal.Assert;
import com.gemstone.gemfire.internal.AvailablePort;
import com.gemstone.gemfire.internal.AvailablePortHelper;
import com.gemstone.gemfire.internal.logging.InternalLogWriter;
@@ -54,6 +55,8 @@ import dunit.VM;
*/
public class LocatorDUnitTest extends DistributedTestCase {
+ static TestHook hook;
+
/**
* Creates a new <code>LocatorDUnitTest</code>
*/
@@ -225,8 +228,7 @@ public class LocatorDUnitTest extends DistributedTestCase {
* Bug 30341 concerns race conditions in JGroups that allow two locators to start up in a
* split-brain configuration. To work around this we have always told customers that they
* need to stagger the starting of locators. This test configures two locators to start up
- * simultaneously and shows that they will refuse to boot if they do not find one another
- * and form a single distributed system.
+ * simultaneously and shows that they find each other and form a single system.
*
* @throws Exception
*/
@@ -257,39 +259,31 @@ public class LocatorDUnitTest extends DistributedTestCase {
SerializableCallable startLocator1 = new SerializableCallable("start locator1") {
@Override
public Object call() throws Exception {
-// boolean setting = GemFireTracer.DEBUG;
try {
System.setProperty("p2p.joinTimeout", "5000"); // set a short join timeout. default is 17000ms
- fail("ignore required response must be implemented for the jgroups replacement");
-// PingWaiter.TEST_HOOK_IGNORE_REQUIRED_RESPONSE = true;
- Locator myLocator = Locator.startLocatorAndDS(port1, new File("testBug30341Locator1.log"), properties);
- myLocator.stop();
+ Locator myLocator = Locator.startLocatorAndDS(port1, new File(""), properties);
} catch (SystemConnectException e) {
- return Boolean.TRUE;
+ return Boolean.FALSE;
} catch (GemFireConfigException e) {
- return Boolean.TRUE;
+ return Boolean.FALSE;
} finally {
-// PingWaiter.TEST_HOOK_IGNORE_REQUIRED_RESPONSE = false;
System.getProperties().remove("p2p.joinTimeout");
}
- return Boolean.FALSE;
+ return Boolean.TRUE;
}
};
SerializableCallable startLocator2 = new SerializableCallable("start locator2") {
@Override
public Object call() throws Exception {
-// boolean setting = GemFireTracer.DEBUG;
try {
System.setProperty("p2p.joinTimeout", "5000"); // set a short join timeout. default is 17000ms
-// GemFireTracer.DEBUG = true;
- Locator myLocator = Locator.startLocatorAndDS(port2, new File("testBug30341Locator2.log"), properties);
+ Locator myLocator = Locator.startLocatorAndDS(port2, new File(""), properties);
} catch (SystemConnectException e) {
- return Boolean.TRUE;
+ return Boolean.FALSE;
} finally {
System.getProperties().remove("p2p.joinTimeout");
-// GemFireTracer.DEBUG = setting;
}
- return Boolean.FALSE;
+ return Boolean.TRUE;
}
};
AsyncInvocation async1 = null;
@@ -315,23 +309,34 @@ public class LocatorDUnitTest extends DistributedTestCase {
if (result2 instanceof Exception) {
throw (Exception)result2;
}
- if (!(Boolean)result1) {
- fail("locator1 started but shouldn't have");
- }
- // note: locator2 will sometimes fail to start as well because it
- // is still able to talk to locator1's gossip server during its
- // last attempt to start up
+ // verify that they found each other
+ SerializableCallable verify = new SerializableCallable("verify no split-brain") {
+ public Object call() {
+ InternalDistributedSystem sys = InternalDistributedSystem.getAnyInstance();
+ if (sys == null) {
+ fail("no distributed system found");
+ }
+ Assert.assertTrue(sys.getDM().getViewMembers().size() == 2,
+ "expected 2 members but found " + sys.getDM().getViewMembers().size()
+ );
+ return true;
+ }
+ };
+ loc2.invoke(verify);
+ loc1.invoke(verify);
}
}
} finally {
- loc2.invoke(new SerializableRunnable("stop locator") {
+ SerializableRunnable r = new SerializableRunnable("stop locator") {
public void run() {
Locator loc = Locator.getLocator();
if (loc != null) {
loc.stop();
}
}
- });
+ };
+ loc2.invoke(r);
+ loc1.invoke(r);
}
}
@@ -355,7 +360,7 @@ public class LocatorDUnitTest extends DistributedTestCase {
properties.put("enable-network-partition-detection", "true");
properties.put("disable-auto-reconnect", "true");
- File logFile = new File(getUniqueName() + "-locator.log");
+ File logFile = new File("");
if (logFile.exists()) {
logFile.delete();
}
@@ -379,7 +384,6 @@ public class LocatorDUnitTest extends DistributedTestCase {
// connect three vms and then watch the lead member selection as they
// are disconnected/reconnected
- properties.put("log-file", "testLeadMemberSelection_vm1.log");
properties.put("name", "vm1");
DistributedMember mem1 = (DistributedMember)vm1.invoke(this.getClass(),
"getDistributedMember", connectArgs);
@@ -387,13 +391,11 @@ public class LocatorDUnitTest extends DistributedTestCase {
// assertTrue(MembershipManagerHelper.getLeadMember(sys) != null);
assertLeadMember(mem1, sys, 5000);
- properties.put("log-file", "testLeadMemberSelection_vm2.log");
properties.put("name", "vm2");
DistributedMember mem2 = (DistributedMember)vm2.invoke(this.getClass(),
"getDistributedMember", connectArgs);
assertLeadMember(mem1, sys, 5000);
- properties.put("log-file", "testLeadMemberSelection_vm3.log");
properties.put("name", "vm3");
DistributedMember mem3 = (DistributedMember)vm3.invoke(this.getClass(),
"getDistributedMember", connectArgs);
@@ -404,7 +406,6 @@ public class LocatorDUnitTest extends DistributedTestCase {
MembershipManagerHelper.getMembershipManager(sys).waitForDeparture(mem1);
assertLeadMember(mem2, sys, 5000);
- properties.put("log-file", "testLeadMemberSelection_vm1.log");
properties.put("name", "vm1");
mem1 = (DistributedMember)vm1.invoke(this.getClass(),
"getDistributedMember", connectArgs);
@@ -484,18 +485,19 @@ public class LocatorDUnitTest extends DistributedTestCase {
properties.put("enable-network-partition-detection", "true");
properties.put("disable-auto-reconnect", "true");
properties.put("member-timeout", "2000");
+// properties.put("log-level", "fine");
properties.put(DistributionConfig.ENABLE_CLUSTER_CONFIGURATION_NAME, "false");
try {
final String uname = getUniqueName();
- File logFile = new File(uname + "-locator1-" + port1 + ".log");
+ File logFile = new File("");
locator = Locator.startLocatorAndDS(port1, logFile, properties);
final DistributedSystem sys = locator.getDistributedSystem();
sys.getLogWriter().info("<ExpectedException action=add>java.net.ConnectException</ExpectedException>");
MembershipManagerHelper.inhibitForcedDisconnectLogging(true);
locvm.invoke(new SerializableRunnable() {
public void run() {
- File lf = new File(uname + "-locator2-" + port2 + ".log");
+ File lf = new File("");
try {
Locator.startLocatorAndDS(port2, lf, properties);
}
@@ -618,12 +620,12 @@ public class LocatorDUnitTest extends DistributedTestCase {
try {
final String uname = getUniqueName();
- File logFile = new File(uname + "-locator1-" + port1 + ".log");
+ File logFile = new File("");
locator = Locator.startLocatorAndDS(port1, logFile, properties);
final DistributedSystem sys = locator.getDistributedSystem();
locvm3.invoke(new SerializableRunnable() {
public void run() {
- File lf = new File(uname + "-locator2-" + port2 + ".log");
+ File lf = new File("");
try {
Locator.startLocatorAndDS(port2, lf, properties);
}
@@ -768,12 +770,12 @@ public class LocatorDUnitTest extends DistributedTestCase {
try {
final String uname = getUniqueName();
- File logFile = new File(uname + "-locator1-" + port1 + ".log");
+ File logFile = new File("");
locator = Locator.startLocatorAndDS(port1, logFile, properties);
DistributedSystem sys = locator.getDistributedSystem();
locvm.invoke(new SerializableRunnable() {
public void run() {
- File lf = new File(uname + "-locator2-" + port2 + ".log");
+ File lf = new File("");
try {
Locator.startLocatorAndDS(port2, lf, properties);
MembershipManagerHelper.inhibitForcedDisconnectLogging(true);
@@ -909,18 +911,19 @@ public class LocatorDUnitTest extends DistributedTestCase {
properties.put("enable-network-partition-detection", "true");
properties.put("disable-auto-reconnect", "true");
properties.put("member-timeout", "2000");
+// properties.put("log-level", "fine");
properties.put(DistributionConfig.ENABLE_CLUSTER_CONFIGURATION_NAME, "false");
SerializableRunnable stopLocator = getStopLocatorRunnable();
try {
final String uname = getUniqueName();
- File logFile = new File(uname + "-locator1-" + port1 + ".log");
+ File logFile = new File("");
locator = Locator.startLocatorAndDS(port1, logFile, properties);
DistributedSystem sys = locator.getDistributedSystem();
locvm.invoke(new SerializableRunnable() {
public void run() {
- File lf = new File(uname + "-locator2-" + port2 + ".log");
+ File lf = new File("");
try {
Locator.startLocatorAndDS(port2, lf, properties);
}
@@ -932,7 +935,6 @@ public class LocatorDUnitTest extends DistributedTestCase {
Object[] connectArgs = new Object[]{ properties };
- MembershipTestHook testHook;
SerializableRunnable crashSystem =
new SerializableRunnable("Crash system") {
public void run() {
@@ -941,7 +943,13 @@ public class LocatorDUnitTest extends DistributedTestCase {
msys.getLogWriter().info("<ExpectedException action=add>com.gemstone.gemfire.ConnectException</ExpectedException>");
msys.getLogWriter().info("<ExpectedException action=add>com.gemstone.gemfire.ForcedDisconnectException</ExpectedException>");
msys.getLogWriter().info("<ExpectedException action=add>Possible loss of quorum</ExpectedException>");
- MembershipManagerHelper.crashDistributedSystem(msys);
+ hook = new TestHook();
+ MembershipManagerHelper.getMembershipManager(msys).registerTestHook(hook);
+ try {
+ MembershipManagerHelper.crashDistributedSystem(msys);
+ } finally {
+ hook.reset();
+ }
}
};
@@ -988,32 +996,25 @@ public class LocatorDUnitTest extends DistributedTestCase {
msys.getLogWriter().info("<ExpectedException action=add>Membership: requesting removal of </ExpectedException>");
mmgr.requestMemberRemoval(mem1, "test reasons");
msys.getLogWriter().info("<ExpectedException action=remove>Membership: requesting removal of </ExpectedException>");
-
fail("It should have thrown exception in requestMemberRemoval");
} catch (DistributedSystemDisconnectedException e) {
Throwable cause = e.getCause();
assertTrue(
"This should have been ForceDisconnectException but found "
+ cause, cause instanceof ForcedDisconnectException);
+ } finally {
+ hook.reset();
}
}
});
- locvm.invoke(stopLocator);
- // stop the locator normally. This should also be okay
- locator.stop();
-
- if (!Locator.getLocators().isEmpty()) {
- // log this for debugging purposes before throwing assertion error
- getLogWriter().warning("found locator " + Locator.getLocators().iterator().next());
- }
- assertTrue("locator is not stopped", Locator.getLocators().isEmpty());
}
finally {
if (locator != null) {
locator.stop();
}
locvm.invoke(stopLocator);
+ assertTrue("locator is not stopped", Locator.getLocators().isEmpty());
}
}
@@ -1071,7 +1072,7 @@ public class LocatorDUnitTest extends DistributedTestCase {
final String uname = getUniqueName();
locvm.invoke(new SerializableRunnable() {
public void run() {
- File lf = new File(uname + "-locator1-" + port1 + ".log");
+ File lf = new File("");
try {
Locator.startLocatorAndDS(port2, lf, properties);
}
@@ -1081,7 +1082,7 @@ public class LocatorDUnitTest extends DistributedTestCase {
}
});
- File logFile = new File(uname + "-locator2-" + port2 + ".log");
+ File logFile = new File("");
locator = Locator.startLocatorAndDS(port1, logFile, properties);
DistributedSystem sys = locator.getDistributedSystem();
sys.getLogWriter().info("<ExpectedException action=add>com.gemstone.gemfire.ForcedDisconnectException</ExpectedException>");
@@ -1223,8 +1224,7 @@ public class LocatorDUnitTest extends DistributedTestCase {
vm0.invoke(new SerializableRunnable("Start locator " + locators) {
public void run() {
- File logFile = new File(uniqueName + "-locator" + port
- + ".log");
+ File logFile = new File("");
try {
Properties locProps = new Properties();
locProps.setProperty("mcast-port", "0");
@@ -1466,8 +1466,7 @@ public class LocatorDUnitTest extends DistributedTestCase {
vm0.invoke(new SerializableRunnable("Start locator on " + port1) {
public void run() {
- File logFile = new File(uniqueName + "-locator" + port1
- + ".log");
+ File logFile = new File("");
try {
Locator.startLocatorAndDS(port1, logFile, dsProps);
} catch (IOException ex) {
@@ -1481,8 +1480,7 @@ public class LocatorDUnitTest extends DistributedTestCase {
vm3.invoke(new SerializableRunnable("Start locator on " + port2) {
public void run() {
- File logFile = new File(uniqueName + "-locator" +
- port2 + ".log");
+ File logFile = new File("");
try {
Locator.startLocatorAndDS(port2, logFile, dsProps);
@@ -1579,8 +1577,7 @@ public class LocatorDUnitTest extends DistributedTestCase {
vm0.invoke(new SerializableRunnable("Start locator on " + port1) {
public void run() {
- File logFile = new File(uniqueName + "-locator" + port1
- + ".log");
+ File logFile = new File("");
try {
Properties props = new Properties();
props.setProperty("mcast-port", String.valueOf(mcastport));
@@ -1597,8 +1594,7 @@ public class LocatorDUnitTest extends DistributedTestCase {
});
vm3.invoke(new SerializableRunnable("Start locator on " + port2) {
public void run() {
- File logFile = new File(uniqueName + "-locator" +
- port2 + ".log");
+ File logFile = new File("");
try {
Properties props = new Properties();
props.setProperty("mcast-port", String.valueOf(mcastport));
@@ -1682,7 +1678,7 @@ public class LocatorDUnitTest extends DistributedTestCase {
port1 =
AvailablePort.getRandomAvailablePort(AvailablePort.SOCKET);
deleteLocatorStateFile(port1);
- File logFile = new File(getUniqueName() + "-locator" + port1 + ".log");
+ File logFile = new File("");
Locator locator = Locator.startLocator(port1, logFile);
try {
@@ -1716,21 +1712,21 @@ public class LocatorDUnitTest extends DistributedTestCase {
final Properties properties = new Properties();
properties.put("mcast-port", "0");
properties.put("locators", locators);
- properties.put("enable-network-partition-detection", "true");
+ properties.put(DistributionConfig.ENABLE_NETWORK_PARTITION_DETECTION_NAME, "true");
properties.put("disable-auto-reconnect", "true");
properties.put(DistributionConfig.ENABLE_CLUSTER_CONFIGURATION_NAME, "false");
- File logFile = new File(getUniqueName() + "-locatorA-" + port1 + ".log");
+ File logFile = new File("");
locator = Locator.startLocatorAndDS(port1, logFile, properties);
final Properties properties2 = new Properties();
properties2.put("mcast-port", "0");
properties2.put("locators", locators);
- properties2.put("enable-network-partition-detection", "false");
+ properties2.put(DistributionConfig.ENABLE_NETWORK_PARTITION_DETECTION_NAME, "false");
properties2.put(DistributionConfig.ENABLE_CLUSTER_CONFIGURATION_NAME, "false");
properties.put("disable-auto-reconnect", "true");
- vm1.invoke(new SerializableRunnable("try, but fail to connect") {
+ vm1.invoke(new SerializableRunnable("try to connect") {
public void run() {
DistributedSystem s = null;
try {
@@ -1750,20 +1746,18 @@ public class LocatorDUnitTest extends DistributedTestCase {
locator.stop();
// now start the locator with enable-network-partition-detection=false
- logFile = new File(getUniqueName() + "-locatorB-" + port1 + ".log");
+ logFile = new File("");
locator = Locator.startLocatorAndDS(port1, logFile , properties2);
- vm1.invoke(new SerializableRunnable("try, but fail to connect(2)") {
+ vm1.invoke(new SerializableRunnable("try to connect") {
public void run() {
DistributedSystem s = null;
- try {
- s = DistributedSystem.connect(properties);
- s.disconnect();
+ s = DistributedSystem.connect(properties);
+ boolean enabled = ((InternalDistributedSystem)s).getConfig().getEnableNetworkPartitionDetection();
+ s.disconnect();
+ if (enabled) {
fail("should not have been able to connect with different enable-network-partition-detection settings");
}
- catch (GemFireConfigException e) {
- // passed
- }
}
});
@@ -1788,14 +1782,14 @@ public class LocatorDUnitTest extends DistributedTestCase {
port1 =
AvailablePort.getRandomAvailablePort(AvailablePort.SOCKET);
- File logFile1 = new File(getUniqueName() + "-locator" + port1 + ".log");
+ File logFile1 = new File("");
Locator locator1 = Locator.startLocator(port1, logFile1);
try {
int port2 =
AvailablePort.getRandomAvailablePort(AvailablePort.SOCKET);
- File logFile2 = new File(getUniqueName() + "-locator" + port2 + ".log");
+ File logFile2 = new File("");
deleteLocatorStateFile(port1, port2);
@@ -1850,7 +1844,7 @@ public class LocatorDUnitTest extends DistributedTestCase {
port1 =
AvailablePort.getRandomAvailablePort(AvailablePort.SOCKET);
deleteLocatorStateFile(port1);
- File logFile = new File(getUniqueName() + "-locator" + port1 + ".log");
+ File logFile = new File("");
File stateFile = new File("locator"+port1+"state.dat");
VM vm0 = Host.getHost(0).getVM(0);
final Properties p = new Properties();
@@ -1928,8 +1922,7 @@ public class LocatorDUnitTest extends DistributedTestCase {
private SerializableRunnable getStartSBLocatorRunnable(final int port, final String name) {
return new SerializableRunnable("Start locator on port " + port) {
public void run() {
- File logFile = new File(name + "-locator" + port
- + ".log");
+ File logFile = new File("");
try {
System.setProperty("gemfire.disable-floating-coordinator", "true");
System.setProperty("p2p.joinTimeout", "1000");
@@ -1962,6 +1955,31 @@ public class LocatorDUnitTest extends DistributedTestCase {
}
+ //New test hook which blocks before closing channel.
+ class TestHook implements MembershipTestHook {
+
+ volatile boolean unboundedWait = true;
+ @Override
+ public void beforeMembershipFailure(String reason, Throwable cause) {
+ System.out.println("Inside TestHook.beforeMembershipFailure with " + cause);
+ long giveUp = System.currentTimeMillis() + 30000;
+ if (cause instanceof ForcedDisconnectException) {
+ while (unboundedWait && System.currentTimeMillis() < giveUp) {
+ pause(1000);
+ }
+ }
+ System.out.println("TestHook exiting");
+ }
+
+ @Override
+ public void afterMembershipFailure(String reason, Throwable cause) {
+ }
+
+ public void reset() {
+ unboundedWait = false;
+ }
+
+ }
class MyMembershipListener implements MembershipListener {
boolean quorumLostInvoked;
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/test/java/com/gemstone/gemfire/distributed/internal/membership/gms/membership/GMSHealthMonitorJUnitTest.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/test/java/com/gemstone/gemfire/distributed/internal/membership/gms/membership/GMSHealthMonitorJUnitTest.java b/gemfire-core/src/test/java/com/gemstone/gemfire/distributed/internal/membership/gms/membership/GMSHealthMonitorJUnitTest.java
index 3d05b89..e7962a3 100644
--- a/gemfire-core/src/test/java/com/gemstone/gemfire/distributed/internal/membership/gms/membership/GMSHealthMonitorJUnitTest.java
+++ b/gemfire-core/src/test/java/com/gemstone/gemfire/distributed/internal/membership/gms/membership/GMSHealthMonitorJUnitTest.java
@@ -30,6 +30,7 @@ import com.gemstone.gemfire.distributed.internal.membership.gms.messages.PingRes
import com.gemstone.gemfire.distributed.internal.membership.gms.messages.RemoveMemberMessage;
import com.gemstone.gemfire.distributed.internal.membership.gms.messages.SuspectMembersMessage;
import com.gemstone.gemfire.distributed.internal.membership.gms.messages.SuspectRequest;
+import com.gemstone.gemfire.internal.SocketCreator;
import com.gemstone.gemfire.test.junit.categories.UnitTest;
@Category(UnitTest.class)
@@ -83,9 +84,11 @@ public class GMSHealthMonitorJUnitTest {
public void testHMServiceStarted() throws IOException {
MethodExecuted messageSent = new MethodExecuted();
+ InternalDistributedMember mbr = new InternalDistributedMember(SocketCreator.getLocalHost(), 12345);
+ when(messenger.getMemberID()).thenReturn(mbr);
when(messenger.send(any(PingResponseMessage.class))).thenAnswer(messageSent);
- gmsHealthMonitor.processMessage(new PingRequestMessage(1));
+ gmsHealthMonitor.processMessage(new PingRequestMessage(mbr, 1));
Assert.assertTrue("Ping Response should have been sent", messageSent.isMethodExecuted());
}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/test/java/dunit/DistributedTestCase.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/test/java/dunit/DistributedTestCase.java b/gemfire-core/src/test/java/dunit/DistributedTestCase.java
index 56eb825..058980f 100755
--- a/gemfire-core/src/test/java/dunit/DistributedTestCase.java
+++ b/gemfire-core/src/test/java/dunit/DistributedTestCase.java
@@ -425,7 +425,7 @@ public abstract class DistributedTestCase extends TestCase implements java.io.Se
if (!p.contains(DistributionConfig.DISABLE_AUTO_RECONNECT_NAME)) {
p.put(DistributionConfig.DISABLE_AUTO_RECONNECT_NAME, "true");
}
-
+
for (Iterator iter = props.entrySet().iterator();
iter.hasNext(); ) {
Map.Entry entry = (Map.Entry) iter.next();
@@ -1349,7 +1349,7 @@ public abstract class DistributedTestCase extends TestCase implements java.io.Se
*/
public void deleteLocatorStateFile(int... ports) {
for (int i=0; i<ports.length; i++) {
- File stateFile = new File("locator"+ports[i]+"state.dat");
+ File stateFile = new File("locator"+ports[i]+"view.dat");
if (stateFile.exists()) {
stateFile.delete();
}