You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@geode.apache.org by bs...@apache.org on 2015/09/03 01:42:48 UTC

incubator-geode git commit: GEODE-77 bug fixes for LocatorDUnitTest

Repository: incubator-geode
Updated Branches:
  refs/heads/feature/GEODE-77 6bf1f2094 -> 017861064


GEODE-77 bug fixes for LocatorDUnitTest

All tests in LocatorDUnitTest now pass except the quorum check,
which needs a test hook.  GMSJoinLeaveJUnitTest, GMSHealthMonitoryJUnitTest
(except for testSuspectMembersCalledThroughSuspectThread) and
MembershipJUnitTest are all passing with these changes.


Project: http://git-wip-us.apache.org/repos/asf/incubator-geode/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-geode/commit/01786106
Tree: http://git-wip-us.apache.org/repos/asf/incubator-geode/tree/01786106
Diff: http://git-wip-us.apache.org/repos/asf/incubator-geode/diff/01786106

Branch: refs/heads/feature/GEODE-77
Commit: 0178610645178cc995ca60992d00368a266f1378
Parents: 6bf1f20
Author: Bruce Schuchardt <bs...@pivotal.io>
Authored: Wed Sep 2 16:41:26 2015 -0700
Committer: Bruce Schuchardt <bs...@pivotal.io>
Committed: Wed Sep 2 16:42:33 2015 -0700

----------------------------------------------------------------------
 .../internal/DistributionManager.java           |   2 -
 .../internal/membership/NetMember.java          |   2 +
 .../internal/membership/NetView.java            |  10 +-
 .../internal/membership/gms/ServiceConfig.java  |   2 +
 .../internal/membership/gms/Services.java       |  27 ++-
 .../membership/gms/fd/GMSHealthMonitor.java     |  75 +++++---
 .../membership/gms/locator/GMSLocator.java      |   8 +-
 .../membership/gms/membership/GMSJoinLeave.java |  84 ++++++---
 .../gms/messages/PingRequestMessage.java        |  12 +-
 .../gms/messages/PingResponseMessage.java       |   2 +-
 .../gms/messenger/AddressManager.java           |   4 +-
 .../gms/messenger/JGroupsMessenger.java         |  11 +-
 .../gms/mgr/GMSMembershipManager.java           |  13 +-
 .../gemfire/distributed/LocatorDUnitTest.java   | 178 ++++++++++---------
 .../membership/GMSHealthMonitorJUnitTest.java   |   5 +-
 .../test/java/dunit/DistributedTestCase.java    |   4 +-
 16 files changed, 272 insertions(+), 167 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/DistributionManager.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/DistributionManager.java b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/DistributionManager.java
index e8fe361..a6a425b 100644
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/DistributionManager.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/DistributionManager.java
@@ -629,8 +629,6 @@ public class DistributionManager
         logger.info(LogMarker.DM, LocalizedMessage.create(
             LocalizedStrings.DistributionManager_DISTRIBUTIONMANAGER_0_STARTED_ON_1_THERE_WERE_2_OTHER_DMS_3_4_5, logArgs));
 
-logger.info("My ID is {}", Integer.toHexString(System.identityHashCode(dm.getDistributionManagerId())));
-        
         MembershipLogger.logStartup(dm.getDistributionManagerId());
       }
       return dm;

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/NetMember.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/NetMember.java b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/NetMember.java
index 8966e0f..a110ff2 100755
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/NetMember.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/NetMember.java
@@ -42,6 +42,8 @@ public interface NetMember
    */
   public boolean splitBrainEnabled();
   
+  public void setSplitBrainEnabled(boolean enabled);
+  
   /**
    * return a flag stating whether the member can be the membership coordinator
    * @since 5.6

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/NetView.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/NetView.java b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/NetView.java
index 46ac284..3ebae8f 100644
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/NetView.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/NetView.java
@@ -205,16 +205,17 @@ public class NetView implements DataSerializableFixedID {
   }
 
   /***
-   * This functions returns the list of all preferred coordinators.
+   * This functions returns the list of preferred coordinators.
    * One random member from list of non-preferred member list. It make
    * sure that random member is not in suspected Set.
    * And local member.
    * 
    * @param filter Suspect member set.
    * @param localAddress
+   * @param maxNumberDesired number of preferred coordinators to return
    * @return list of preferred coordinators
    */
-  public List<InternalDistributedMember> getAllPreferredCoordinators(Set<InternalDistributedMember> filter, InternalDistributedMember localAddress) {
+  public List<InternalDistributedMember> getPreferredCoordinators(Set<InternalDistributedMember> filter, InternalDistributedMember localAddress, int maxNumberDesired) {
     List<InternalDistributedMember> results = new ArrayList<InternalDistributedMember>();
     List<InternalDistributedMember> notPreferredCoordinatorList = new ArrayList<InternalDistributedMember>();
 
@@ -224,7 +225,10 @@ public class NetView implements DataSerializableFixedID {
           continue;// this is must to add
         }
         if (addr.getNetMember().preferredForCoordinator()) {
-          results.add(addr);// add all preferred coordinator
+          results.add(addr);
+          if (results.size() >= maxNumberDesired) {
+            break;
+          }
         } else if (!filter.contains(addr)) {
           notPreferredCoordinatorList.add(addr);
         }

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/ServiceConfig.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/ServiceConfig.java b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/ServiceConfig.java
index d6b4d47..c66e262 100755
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/ServiceConfig.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/ServiceConfig.java
@@ -135,6 +135,8 @@ public class ServiceConfig {
     
     memberWeight = Integer.getInteger("gemfire.member-weight", 0);
     locatorWaitTime = theConfig.getLocatorWaitTime();
+    
+    networkPartitionDetectionEnabled = theConfig.getEnableNetworkPartitionDetection();
   }
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/Services.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/Services.java b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/Services.java
index aa7bc1e..efc95ac 100755
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/Services.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/Services.java
@@ -7,6 +7,7 @@ import org.apache.logging.log4j.Logger;
 import com.gemstone.gemfire.CancelCriterion;
 import com.gemstone.gemfire.CancelException;
 import com.gemstone.gemfire.LogWriter;
+import com.gemstone.gemfire.distributed.DistributedSystemDisconnectedException;
 import com.gemstone.gemfire.distributed.internal.DMStats;
 import com.gemstone.gemfire.distributed.internal.DistributionConfig;
 import com.gemstone.gemfire.distributed.internal.InternalLocator;
@@ -149,6 +150,18 @@ public class Services {
   }
   
   public void emergencyClose() {
+    logger.info("Membership: stopping services");
+    if (stopping) {
+      return;
+    }
+    stopping = true;
+    this.joinLeave.emergencyClose();
+    this.healthMon.emergencyClose();
+    this.auth.emergencyClose();
+    this.messenger.emergencyClose();
+    this.manager.emergencyClose();
+    this.timer.cancel();
+    this.cancelCriterion.cancel("Membership services are shut down");
   }
   
   public void stop() {
@@ -262,18 +275,14 @@ public class Services {
         return null;
       }
       else {
-        return new ServicesStoppedException(reasonForStopping, e);
+        if (e == null) {
+          return new DistributedSystemDisconnectedException(reasonForStopping);
+        } else {
+          return new DistributedSystemDisconnectedException(reasonForStopping, e);
+        }
       }
     }
     
   }
   
-  public static class ServicesStoppedException extends CancelException {
-    private static final long serialVersionUID = 2134474966059876801L;
-
-    public ServicesStoppedException(String message, Throwable cause) {
-      super(message, cause);
-    }
-  }
-
 }

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/fd/GMSHealthMonitor.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/fd/GMSHealthMonitor.java b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/fd/GMSHealthMonitor.java
index babf9e3..84f537d 100755
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/fd/GMSHealthMonitor.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/fd/GMSHealthMonitor.java
@@ -6,7 +6,6 @@ import static com.gemstone.gemfire.internal.DataSerializableFixedID.SUSPECT_MEMB
 
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.Date;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -25,22 +24,17 @@ import java.util.concurrent.atomic.AtomicInteger;
 import org.apache.logging.log4j.Logger;
 
 import com.gemstone.gemfire.distributed.DistributedMember;
+import com.gemstone.gemfire.distributed.DistributedSystemDisconnectedException;
 import com.gemstone.gemfire.distributed.internal.DistributionMessage;
 import com.gemstone.gemfire.distributed.internal.membership.InternalDistributedMember;
 import com.gemstone.gemfire.distributed.internal.membership.NetView;
 import com.gemstone.gemfire.distributed.internal.membership.gms.Services;
 import com.gemstone.gemfire.distributed.internal.membership.gms.interfaces.HealthMonitor;
 import com.gemstone.gemfire.distributed.internal.membership.gms.interfaces.MessageHandler;
-import com.gemstone.gemfire.distributed.internal.membership.gms.messages.InstallViewMessage;
-import com.gemstone.gemfire.distributed.internal.membership.gms.messages.JoinRequestMessage;
-import com.gemstone.gemfire.distributed.internal.membership.gms.messages.JoinResponseMessage;
-import com.gemstone.gemfire.distributed.internal.membership.gms.messages.LeaveRequestMessage;
 import com.gemstone.gemfire.distributed.internal.membership.gms.messages.PingRequestMessage;
 import com.gemstone.gemfire.distributed.internal.membership.gms.messages.PingResponseMessage;
-import com.gemstone.gemfire.distributed.internal.membership.gms.messages.RemoveMemberMessage;
 import com.gemstone.gemfire.distributed.internal.membership.gms.messages.SuspectMembersMessage;
 import com.gemstone.gemfire.distributed.internal.membership.gms.messages.SuspectRequest;
-import com.gemstone.gemfire.distributed.internal.membership.gms.messages.ViewAckMessage;
 
 /**
  * Failure Detection
@@ -173,10 +167,20 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
       GMSHealthMonitor.this.currentTimeStamp = currentTime;
 
       if (neighbour != null) {
-        CustomTimeStamp nextNeigbourTS = GMSHealthMonitor.this.memberVsLastMsgTS.get(neighbour);
+        CustomTimeStamp nextNeighbourTS;
+        synchronized(GMSHealthMonitor.this) {
+          nextNeighbourTS = GMSHealthMonitor.this.memberVsLastMsgTS.get(neighbour);
+        }
 
+        if (nextNeighbourTS == null) {
+          CustomTimeStamp customTS = new CustomTimeStamp();
+          customTS.setTimeStamp(System.currentTimeMillis());
+          memberVsLastMsgTS.put(neighbour, customTS);
+          return;
+        }
+        
         long interval = memberTimeoutInMillis / GMSHealthMonitor.LOGICAL_INTERVAL;
-        long lastTS = currentTime - nextNeigbourTS.getTimeStamp();
+        long lastTS = currentTime - nextNeighbourTS.getTimeStamp();
         if (lastTS + interval >= memberTimeoutInMillis) {
           logger.debug("Checking member {} ", neighbour);
           // now do check request for this member;
@@ -206,7 +210,7 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
 
   private PingRequestMessage constructPingRequestMessage(final InternalDistributedMember pingMember) {
     final int reqId = requestId.getAndIncrement();
-    final PingRequestMessage prm = new PingRequestMessage(reqId);
+    final PingRequestMessage prm = new PingRequestMessage(pingMember, reqId);
     prm.setRecipient(pingMember);
 
     return prm;
@@ -228,7 +232,7 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
           String reason = String.format("Member isn't responding to check message: %s", pingMember);
           GMSHealthMonitor.this.sendSuspectMessage(pingMember, reason);
         } else {
-          logger.debug("Setting next neighbour as member {} not responded.", pingMember);
+          logger.debug("Setting next neighbour as member {} has not responded.", pingMember);
           // back to previous one
           setNextNeighbour(GMSHealthMonitor.this.currentView, null);
         }
@@ -238,7 +242,7 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
   }
 
   private void sendSuspectMessage(InternalDistributedMember mbr, String reason) {
-    logger.debug(reason);
+    logger.debug("Suspecting {} reason=\"{}\"", mbr, reason);
     SuspectRequest sr = new SuspectRequest(mbr, reason);
     List<SuspectRequest> sl = new ArrayList<SuspectRequest>();
     sl.add(sr);
@@ -253,7 +257,7 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
    */
   private boolean doCheckMember(InternalDistributedMember pingMember) {
     //TODO: need to some tcp check
-    logger.debug("Checking the member: {}", pingMember);
+    logger.debug("Checking member {}", pingMember);
     final PingRequestMessage prm = constructPingRequestMessage(pingMember);
     final Response pingResp = new Response();
     requestIdVsResponse.put(prm.getRequestId(), pingResp);
@@ -294,7 +298,7 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
     synchronized (suspectRequests) {
       SuspectRequest sr = new SuspectRequest((InternalDistributedMember) mbr, reason);
       if (!suspectRequests.contains(sr)) {
-        logger.debug("Adding member {} to suspect for reason {}.", mbr, reason);
+        logger.info("Suspecting member {}. Reason= {}.", mbr, reason);
         suspectRequests.add(sr);
         suspectRequests.notify();
       }
@@ -362,8 +366,8 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
     synchronized (viewVsSuspectedMembers) {
       viewVsSuspectedMembers.clear();
     }
+    setNextNeighbour(newView, null);
     currentView = newView;
-    setNextNeighbour(currentView, null);
   }
 
   /***
@@ -381,11 +385,13 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
     }
     boolean sameView = false;
 
-    if (newView.equals(currentView)) {
+    if (currentView != null &&
+        newView.getCreator().equals(currentView.getCreator()) &&
+        newView.getViewId() == currentView.getViewId()) {
       sameView = true;
     }
 
-    List<InternalDistributedMember> allMembers = currentView.getMembers();
+    List<InternalDistributedMember> allMembers = newView.getMembers();
     int index = allMembers.indexOf(nextTo);
     if (index != -1) {
       int nextNeighbourIndex = (index + 1) % allMembers.size();
@@ -394,15 +400,16 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
     }
 
     if (!sameView || memberVsLastMsgTS.size() == 0) {
+      
       if (memberVsLastMsgTS.size() > 0) {
         memberVsLastMsgTS.clear();
       }
 
       long cts = System.currentTimeMillis();
-      for (int i = 0; i < allMembers.size(); i++) {
+      for (InternalDistributedMember mbr: allMembers) {
         CustomTimeStamp customTS = new CustomTimeStamp();
         customTS.setTimeStamp(cts);
-        memberVsLastMsgTS.put(allMembers.get(i), customTS);
+        memberVsLastMsgTS.put(mbr, customTS);
       }
     }
   }
@@ -510,12 +517,18 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
   }
 
   private void processPingRequest(PingRequestMessage m) {
-    PingResponseMessage prm = new PingResponseMessage(m.getRequestId());
-    prm.setRecipient(m.getSender());
-    Set<InternalDistributedMember> membersNotReceivedMsg = services.getMessenger().send(prm);
-    // TODO: send is throwing exception right now
-    if (membersNotReceivedMsg != null && membersNotReceivedMsg.contains(m.getSender())) {
-      logger.debug("Unable to send check response to member: {}", m.getSender());
+    // only respond if the intended recipient is this member
+    InternalDistributedMember me = services.getMessenger().getMemberID();
+    if (me.getVmViewId() < 0 || m.getTarget().equals(me)) {
+      PingResponseMessage prm = new PingResponseMessage(m.getRequestId());
+      prm.setRecipient(m.getSender());
+      Set<InternalDistributedMember> membersNotReceivedMsg = services.getMessenger().send(prm);
+      // TODO: send is throwing exception right now
+      if (membersNotReceivedMsg != null && membersNotReceivedMsg.contains(m.getSender())) {
+        logger.debug("Unable to send check response to member: {}", m.getSender());
+      }
+    } else {
+      logger.debug("Ignoring ping request intended for {}.  My ID is {}", m.getTarget(), me);
     }
   }
 
@@ -540,7 +553,6 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
    */
   private void processSuspectMembersRequest(SuspectMembersMessage incomingRequest) {
     NetView cv = currentView;
-    logger.debug("GMSHealthMonitor.processSuspectMembersRequest invoked for members {}", incomingRequest);
 
     if (cv == null) {
       return;
@@ -638,6 +650,8 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
               if (!pinged) {
                 GMSHealthMonitor.this.services.getJoinLeave().remove(mbr, reason);
               }
+            } catch (DistributedSystemDisconnectedException e) {
+              return;
             } catch (Exception e) {
               logger.info("Unexpected exception while verifying member", e);
             } finally {
@@ -759,10 +773,15 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler {
     for (int i = 0; i < requests.size(); i++) {
       filter.add(requests.get(i).getSuspectMember());
     }
-    List<InternalDistributedMember> recipients = currentView.getAllPreferredCoordinators(filter, services.getJoinLeave().getMemberID());
+    List<InternalDistributedMember> recipients = currentView.getPreferredCoordinators(filter, services.getJoinLeave().getMemberID(), 5);
 
     SuspectMembersMessage rmm = new SuspectMembersMessage(recipients, requests);
-    Set<InternalDistributedMember> failedRecipients = services.getMessenger().send(rmm);
+    Set<InternalDistributedMember> failedRecipients;
+    try {
+      failedRecipients = services.getMessenger().send(rmm);
+    } catch (DistributedSystemDisconnectedException e) {
+      return;
+    }
 
     if (failedRecipients != null && failedRecipients.size() > 0) {
       logger.info("Unable to send suspect message to {}", recipients);

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/locator/GMSLocator.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/locator/GMSLocator.java b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/locator/GMSLocator.java
index d9cfb36..66aa0a9 100755
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/locator/GMSLocator.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/locator/GMSLocator.java
@@ -127,8 +127,8 @@ public class GMSLocator implements Locator, NetLocator {
   public Object processRequest(Object request) throws IOException {
     Object response = null;
     
-    if (logger.isInfoEnabled()) {
-      logger.info("Peer locator processing {}", request);
+    if (logger.isDebugEnabled()) {
+      logger.debug("Peer locator processing {}", request);
     }
     
     if (request instanceof GetViewRequest) {
@@ -151,7 +151,7 @@ public class GMSLocator implements Locator, NetLocator {
         
         if (view != null) {
           coord = view.getCoordinator(findRequest.getRejectedCoordinators());
-          logger.info("Peer locator: coordinator from view is {}", coord);
+          logger.debug("Peer locator: coordinator from view is {}", coord);
           fromView = true;
         }
         
@@ -180,7 +180,7 @@ public class GMSLocator implements Locator, NetLocator {
                 }
               }
             }
-            logger.info("Peer locator: coordinator from registrations is {}", coord);
+            logger.debug("Peer locator: coordinator from registrations is {}", coord);
           }
         }
         response = new FindCoordinatorResponse(coord, fromView,

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/membership/GMSJoinLeave.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/membership/GMSJoinLeave.java b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/membership/GMSJoinLeave.java
index 9414dfa..d852e1b 100755
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/membership/GMSJoinLeave.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/membership/GMSJoinLeave.java
@@ -34,6 +34,7 @@ import org.apache.logging.log4j.Logger;
 import com.gemstone.gemfire.GemFireConfigException;
 import com.gemstone.gemfire.SystemConnectException;
 import com.gemstone.gemfire.distributed.DistributedMember;
+import com.gemstone.gemfire.distributed.DistributedSystemDisconnectedException;
 import com.gemstone.gemfire.distributed.internal.DistributionConfig;
 import com.gemstone.gemfire.distributed.internal.DistributionManager;
 import com.gemstone.gemfire.distributed.internal.DistributionMessage;
@@ -238,6 +239,8 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
         me.setBirthViewId(birthViewId);
         me.setSplitBrainEnabled(o.isSplitBrainEnabled());
         me.setPreferredForCoordinator(o.preferredForCoordinator());
+        services.getConfig().getDistributionConfig().setEnableNetworkPartitionDetection(o.isSplitBrainEnabled());
+        services.getConfig().setNetworkPartitionDetectionEnabled(o.isSplitBrainEnabled());
         installView(response.getCurrentView());
         return true;
       } else {
@@ -346,10 +349,7 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
    */
   private void processRemoveRequest(RemoveMemberMessage incomingRequest) {
     NetView v = currentView;
-    if (logger.isDebugEnabled()) {
-      logger.debug("JoinLeave.processRemoveRequest invoked.  isCoordinator="+isCoordinator+ "; isStopping="+isStopping
-          +"; cancelInProgress="+services.getCancelCriterion().isCancelInProgress());
-    }
+
     InternalDistributedMember mbr = incomingRequest.getMemberID();
 
     if (v != null  &&  !v.contains(incomingRequest.getSender())) {
@@ -358,7 +358,8 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
     }
     
     logger.info("Membership received a request to remove " + mbr
-        + "; reason="+incomingRequest.getReason());
+        + " from " + incomingRequest.getSender()
+        + " reason="+incomingRequest.getReason());
 
     if (mbr.equals(this.localAddress)) {
       // oops - I've been kicked out
@@ -481,12 +482,16 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
   boolean sendView(NetView view, Collection<InternalDistributedMember> newMembers, boolean preparing, ViewReplyProcessor rp) {
     int id = view.getViewId();
     InstallViewMessage msg = new InstallViewMessage(view, services.getAuthenticator().getCredentials(this.localAddress), preparing);
-    Set<InternalDistributedMember> recips = new HashSet<InternalDistributedMember>(view.getMembers());
+    Set<InternalDistributedMember> recips = new HashSet<>(view.getMembers());
 
     recips.removeAll(newMembers); // new members get the view in a JoinResponseMessage
     recips.remove(this.localAddress); // no need to send it to ourselves
 
-    recips.addAll(view.getCrashedMembers());
+    Set<InternalDistributedMember> responders = recips;
+    if (!view.getCrashedMembers().isEmpty()) {
+      recips = new HashSet<>(recips);
+      recips.addAll(view.getCrashedMembers());
+    }
 
     logger.info((preparing? "preparing" : "sending") + " new view " + view);
 
@@ -501,7 +506,7 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
     }
     
     msg.setRecipients(recips);
-    rp.initialize(id, recips);
+    rp.initialize(id, responders);
     services.getMessenger().send(msg);
 
     // only wait for responses during preparation
@@ -879,13 +884,20 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
   public void remove(InternalDistributedMember m, String reason) {
     NetView v = this.currentView;
     
-    if (v != null) {
+    services.getCancelCriterion().checkCancelInProgress(null);
+    
+    if (v != null && v.contains(m)) {
       Set<InternalDistributedMember> filter = new HashSet<>();
       filter.add(m);
-      RemoveMemberMessage msg = new RemoveMemberMessage(v.getAllPreferredCoordinators(filter, getMemberID()), 
+      RemoveMemberMessage msg = new RemoveMemberMessage(v.getPreferredCoordinators(filter, getMemberID(), 5), 
           m,
           reason);
-      services.getMessenger().send(msg);
+      if (this.isCoordinator) {
+        msg.setSender(this.localAddress);
+        processRemoveRequest(msg);
+      } else {
+        services.getMessenger().send(msg);
+      }
     }
   }
 
@@ -1024,6 +1036,9 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
             &&  (services.getCancelCriterion().cancelInProgress() == null)) {
           try {
             synchronized(result) {
+              if (result.isEmpty() || this.conflictingView != null) {
+                break;
+              }
               result.wait(1000);
             }
           } catch (InterruptedException e) {
@@ -1123,7 +1138,11 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
           } // synchronized
           if (requests != null  && !requests.isEmpty()) {
             logger.debug("View Creator is processing {} requests for the next membership view", requests.size());
-            /*boolean success = */createAndSendView(requests);
+            try {
+              createAndSendView(requests);
+            } catch (DistributedSystemDisconnectedException e) {
+              shutdown = true;
+            }
             requests = null;
           }
         }
@@ -1136,7 +1155,7 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
      * Create a new membership view and send it to members (including crashed members).
      * Returns false if the view cannot be prepared successfully, true otherwise
      */
-    boolean createAndSendView(List<DistributionMessage> requests) {
+    void createAndSendView(List<DistributionMessage> requests) {
       List<InternalDistributedMember> joinReqs = new ArrayList<InternalDistributedMember>();
       List<InternalDistributedMember> leaveReqs = new ArrayList<InternalDistributedMember>();
       List<InternalDistributedMember> removalReqs = new ArrayList<InternalDistributedMember>();
@@ -1145,7 +1164,7 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
       NetView oldView = currentView;
       List<InternalDistributedMember> oldMembers;
       if (oldView != null) {
-        oldMembers = oldView.getMembers();
+        oldMembers = new ArrayList<>(oldView.getMembers());
       } else {
         oldMembers = Collections.emptyList();
       }
@@ -1157,7 +1176,17 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
         
         if (msg instanceof JoinRequestMessage) {
           mbr = ((JoinRequestMessage)msg).getMemberID();
-          if (!oldMembers.contains(mbr) && !joinReqs.contains(mbr)) {
+
+          boolean duplicate = false;
+          for (InternalDistributedMember m: oldMembers) {
+            // check the netMembers, which wildcards the
+            // viewID to detect old IDs still in the view
+            if (mbr.getNetMember().equals(m.getNetMember())) {
+              duplicate = true;
+              break;
+            }
+          }
+          if (!duplicate && !joinReqs.contains(mbr)) {
             joinReqs.add(mbr);
           }
         }
@@ -1179,6 +1208,14 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
         }
       }
       
+      
+      
+      if (removalReqs.isEmpty() && leaveReqs.isEmpty() && joinReqs.isEmpty()) {
+        return;
+      }
+      
+      
+      
       NetView newView;
       synchronized(viewInstallationLock) {
         int viewNumber = 0;
@@ -1187,7 +1224,7 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
           mbrs = new ArrayList<InternalDistributedMember>(joinReqs.size());
         } else {
           viewNumber = currentView.getViewId()+1;
-          mbrs = new ArrayList<InternalDistributedMember>(currentView.getMembers());
+          mbrs = new ArrayList<InternalDistributedMember>(oldMembers);
         }
         mbrs.addAll(joinReqs);
         mbrs.removeAll(leaveReqs);
@@ -1198,6 +1235,7 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
       
       for (InternalDistributedMember mbr: joinReqs) {
         mbr.setVmViewId(newView.getViewId());
+        mbr.getNetMember().setSplitBrainEnabled(services.getConfig().isNetworkPartitionDetectionEnabled());
       }
       // send removal messages before installing the view so we stop
       // getting messages from members that have been kicked out
@@ -1207,7 +1245,7 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
       // the new view
       if (newView.getMembers().equals(currentView.getMembers())) {
         logger.info("membership hasn't changed - aborting new view {}", newView);
-        return true;
+        return;
       }
       
       // we want to always check for quorum loss but don't act on it
@@ -1220,7 +1258,7 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
         boolean prepared = false;
         do {
           if (this.shutdown || Thread.currentThread().isInterrupted()) {
-            return false;
+            return;
           }
           prepared = prepareView(newView, joinReqs);
           if (!prepared && quorumRequired) {
@@ -1230,11 +1268,11 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
             } catch (InterruptedException e) {
               // abort the view if interrupted
               shutdown = true;
-              return false;
+              return;
             }
   
             if (!unresponsive.isEmpty()) {
-              List<InternalDistributedMember> failures = new ArrayList<InternalDistributedMember>(currentView.getCrashedMembers().size() + unresponsive.size());
+              List<InternalDistributedMember> failures = new ArrayList<>(currentView.getCrashedMembers().size() + unresponsive.size());
               failures.addAll(unresponsive);
 
               NetView conflictingView = prepareProcessor.getConflictingView();
@@ -1250,7 +1288,9 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
               if (failures.size() > 0) {
                 // abort the current view and try again
                 removalReqs.addAll(failures);
-                newView = new NetView(localAddress, newView.getViewId()+1, newView.getMembers(), leaveReqs,
+                List<InternalDistributedMember> newMembers = new ArrayList<>(newView.getMembers());
+                newMembers.removeAll(removalReqs);
+                newView = new NetView(localAddress, newView.getViewId()+1, newMembers, leaveReqs,
                     removalReqs);
               }
             }
@@ -1261,7 +1301,7 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
       lastConflictingView = null;
       
       sendView(newView, joinReqs);
-      return true;
+      return;
     }
     
     /**

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messages/PingRequestMessage.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messages/PingRequestMessage.java b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messages/PingRequestMessage.java
index 95c86b6..a38c586 100755
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messages/PingRequestMessage.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messages/PingRequestMessage.java
@@ -13,13 +13,19 @@ import com.gemstone.gemfire.internal.Version;
 public class PingRequestMessage extends HighPriorityDistributionMessage{
 
   int requestId;
+  InternalDistributedMember target;
   
-  public PingRequestMessage(int id) {
+  public PingRequestMessage(InternalDistributedMember neighbour, int id) {
     requestId = id;
+    this.target = neighbour;
   }
   
   public PingRequestMessage(){}
   
+  public InternalDistributedMember getTarget() {
+    return target;
+  }
+  
   @Override
   public int getDSFID() {
     return PING_REQUEST;
@@ -32,7 +38,7 @@ public class PingRequestMessage extends HighPriorityDistributionMessage{
 
   @Override
   public String toString() {
-    return "PingRequestMessage [requestId=" + requestId + "]";
+    return "PingRequestMessage [requestId=" + requestId + "] from " + getSender();
   }
 
   public int getRequestId() {
@@ -47,10 +53,12 @@ public class PingRequestMessage extends HighPriorityDistributionMessage{
   @Override
   public void toData(DataOutput out) throws IOException {
     out.writeInt(requestId);
+    DataSerializer.writeObject(target, out);
   }
   
   @Override
   public void fromData(DataInput in) throws IOException, ClassNotFoundException {
     requestId = in.readInt();
+    target = DataSerializer.readObject(in);
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messages/PingResponseMessage.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messages/PingResponseMessage.java b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messages/PingResponseMessage.java
index 86e154c..ef69e25 100755
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messages/PingResponseMessage.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messages/PingResponseMessage.java
@@ -34,7 +34,7 @@ public class PingResponseMessage extends HighPriorityDistributionMessage {
  
   @Override
   public String toString() {
-    return "PingResponseMessage [requestId=" + requestId + "]";
+    return "PingResponseMessage [requestId=" + requestId + "] from " + getSender();
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/AddressManager.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/AddressManager.java b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/AddressManager.java
index bfd0aee..a1305d5 100755
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/AddressManager.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/AddressManager.java
@@ -45,13 +45,13 @@ public class AddressManager extends Protocol {
 
     case Event.FIND_MBRS:
       List<Address> missing = (List<Address>)evt.getArg();
-      logger.debug("AddressManager.FIND_MBRS processing {}", missing);
+//      logger.debug("AddressManager.FIND_MBRS processing {}", missing);
       Responses responses = new Responses(false);
       for (Address laddr: missing) {
         try {
           if (laddr instanceof JGAddress) {
             PingData pd = new PingData(laddr, true, laddr.toString(), newIpAddress(laddr));
-            logger.debug("AddressManager.FIND_MBRS adding response {}", pd);
+//            logger.debug("AddressManager.FIND_MBRS adding response {}", pd);
             responses.addResponse(pd, false);
             updateUDPCache(pd);
           }

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/JGroupsMessenger.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/JGroupsMessenger.java b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/JGroupsMessenger.java
index d7f2723..2004598 100755
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/JGroupsMessenger.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/JGroupsMessenger.java
@@ -403,13 +403,14 @@ public class JGroupsMessenger implements Messenger {
     DMStats theStats = services.getStatistics();
 
     if (!myChannel.isConnected()) {
+      logger.info("JGroupsMessenger channel is closed - messaging is not possible");
       throw new DistributedSystemDisconnectedException("Distributed System is shutting down");
     }
     
     filterOutgoingMessage(msg);
     
-    if (logger.isDebugEnabled()) {
-      logger.debug("JGroupsMessenger sending [{}] recipients: {}", msg, msg.getRecipientsDescription());
+    if (logger.isTraceEnabled()) {
+      logger.trace("JGroupsMessenger sending [{}] recipients: {}", msg, msg.getRecipientsDescription());
     }
     
     InternalDistributedMember[] destinations = msg.getRecipients();
@@ -660,7 +661,7 @@ public class JGroupsMessenger implements Messenger {
       
       services.getStatistics().endMsgDeserialization(start);
 
-      logger.debug("JGroupsReceiver deserialized {}", result);
+      logger.trace("JGroupsReceiver deserialized {}", result);
 
     }
     catch (ClassNotFoundException | IOException | RuntimeException e) {
@@ -769,8 +770,8 @@ public class JGroupsMessenger implements Messenger {
         return;
       }
 
-      if (logger.isDebugEnabled()) {
-        logger.debug("JGroupsMessenger received {} headers: {}", jgmsg, jgmsg.getHeaders());
+      if (logger.isTraceEnabled()) {
+        logger.trace("JGroupsMessenger received {} headers: {}", jgmsg, jgmsg.getHeaders());
       }
       
       Object o = readJGMessage(jgmsg);

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/mgr/GMSMembershipManager.java b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
index 9c4d061..1cc079e 100755
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
@@ -1685,12 +1685,6 @@ public class GMSMembershipManager implements MembershipManager, Manager
       directChannel.emergencyClose();
     }
     
-    services.getJoinLeave().emergencyClose();
-    services.getMessenger().emergencyClose();
-    services.getHealthMonitor().emergencyClose();
-    services.getAuthenticator().emergencyClose();
-
-    
     // could we guarantee not to allocate objects?  We're using Darrel's 
     // factory, so it's possible that an unsafe implementation could be
     // introduced here.
@@ -1765,6 +1759,10 @@ public class GMSMembershipManager implements MembershipManager, Manager
   public void uncleanShutdown(String reason, final Exception e) {
     inhibitForcedDisconnectLogging(false);
     
+    if (this.shutdownCause == null) {
+      this.shutdownCause = e;
+    }
+    
     if (this.directChannel != null) {
       this.directChannel.disconnect(e);
     }
@@ -2916,6 +2914,9 @@ public class GMSMembershipManager implements MembershipManager, Manager
         logger.fatal(LocalizedMessage.create(
             LocalizedStrings.GroupMembershipService_MEMBERSHIP_SERVICE_FAILURE_0, reason), shutdownCause);
       }
+      
+      services.emergencyClose();
+
       // stop server locators immediately since they may not have correct
       // information.  This has caused client failures in bridge/wan
       // network-down testing

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/test/java/com/gemstone/gemfire/distributed/LocatorDUnitTest.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/test/java/com/gemstone/gemfire/distributed/LocatorDUnitTest.java b/gemfire-core/src/test/java/com/gemstone/gemfire/distributed/LocatorDUnitTest.java
index 7f1a922..0145efb 100644
--- a/gemfire-core/src/test/java/com/gemstone/gemfire/distributed/LocatorDUnitTest.java
+++ b/gemfire-core/src/test/java/com/gemstone/gemfire/distributed/LocatorDUnitTest.java
@@ -33,6 +33,7 @@ import com.gemstone.gemfire.distributed.internal.membership.MembershipManager;
 import com.gemstone.gemfire.distributed.internal.membership.MembershipTestHook;
 import com.gemstone.gemfire.distributed.internal.membership.NetView;
 import com.gemstone.gemfire.distributed.internal.membership.gms.MembershipManagerHelper;
+import com.gemstone.gemfire.internal.Assert;
 import com.gemstone.gemfire.internal.AvailablePort;
 import com.gemstone.gemfire.internal.AvailablePortHelper;
 import com.gemstone.gemfire.internal.logging.InternalLogWriter;
@@ -54,6 +55,8 @@ import dunit.VM;
  */
 public class LocatorDUnitTest extends DistributedTestCase {
 
+  static TestHook hook;
+  
   /**
    * Creates a new <code>LocatorDUnitTest</code>
    */
@@ -225,8 +228,7 @@ public class LocatorDUnitTest extends DistributedTestCase {
    * Bug 30341 concerns race conditions in JGroups that allow two locators to start up in a
    * split-brain configuration.  To work around this we have always told customers that they
    * need to stagger the starting of locators.  This test configures two locators to start up
-   * simultaneously and shows that they will refuse to boot if they do not find one another
-   * and form a single distributed system.
+   * simultaneously and shows that they find each other and form a single system.
    * 
    * @throws Exception
    */
@@ -257,39 +259,31 @@ public class LocatorDUnitTest extends DistributedTestCase {
     SerializableCallable startLocator1 = new SerializableCallable("start locator1") {
       @Override
       public Object call() throws Exception {
-//        boolean setting = GemFireTracer.DEBUG;
         try {
           System.setProperty("p2p.joinTimeout", "5000"); // set a short join timeout.  default is 17000ms
-          fail("ignore required response must be implemented for the jgroups replacement");
-//          PingWaiter.TEST_HOOK_IGNORE_REQUIRED_RESPONSE = true;
-          Locator myLocator = Locator.startLocatorAndDS(port1, new File("testBug30341Locator1.log"), properties);
-          myLocator.stop();
+          Locator myLocator = Locator.startLocatorAndDS(port1, new File(""), properties);
         } catch (SystemConnectException e) {
-          return Boolean.TRUE;
+          return Boolean.FALSE;
         } catch (GemFireConfigException e) {
-          return Boolean.TRUE;
+          return Boolean.FALSE;
         } finally {
-//          PingWaiter.TEST_HOOK_IGNORE_REQUIRED_RESPONSE = false;
           System.getProperties().remove("p2p.joinTimeout");
         }
-        return Boolean.FALSE;
+        return Boolean.TRUE;
       }
     };
     SerializableCallable startLocator2 = new SerializableCallable("start locator2") {
       @Override
       public Object call() throws Exception {
-//        boolean setting = GemFireTracer.DEBUG;
         try {
           System.setProperty("p2p.joinTimeout", "5000"); // set a short join timeout.  default is 17000ms
-//          GemFireTracer.DEBUG = true;
-          Locator myLocator = Locator.startLocatorAndDS(port2, new File("testBug30341Locator2.log"), properties);
+          Locator myLocator = Locator.startLocatorAndDS(port2, new File(""), properties);
         } catch (SystemConnectException e) {
-          return Boolean.TRUE;
+          return Boolean.FALSE;
         } finally {
           System.getProperties().remove("p2p.joinTimeout");
-//          GemFireTracer.DEBUG = setting;
         }
-        return Boolean.FALSE;
+        return Boolean.TRUE;
       }
     };
     AsyncInvocation async1 = null;
@@ -315,23 +309,34 @@ public class LocatorDUnitTest extends DistributedTestCase {
             if (result2 instanceof Exception) {
               throw (Exception)result2;
             }
-            if (!(Boolean)result1) {
-              fail("locator1 started but shouldn't have");
-            }
-            // note: locator2 will sometimes fail to start as well because it
-            // is still able to talk to locator1's gossip server during its
-            // last attempt to start up
+            // verify that they found each other
+            SerializableCallable verify = new SerializableCallable("verify no split-brain") {
+              public Object call() {
+                InternalDistributedSystem sys = InternalDistributedSystem.getAnyInstance();
+                if (sys == null) {
+                  fail("no distributed system found");
+                }
+                Assert.assertTrue(sys.getDM().getViewMembers().size() == 2,
+                    "expected 2 members but found " + sys.getDM().getViewMembers().size()
+                    );
+                return true;
+              }
+            };
+            loc2.invoke(verify);
+            loc1.invoke(verify);
           }
         }
       } finally {
-        loc2.invoke(new SerializableRunnable("stop locator") {
+        SerializableRunnable r = new SerializableRunnable("stop locator") {
           public void run() {
             Locator loc = Locator.getLocator();
             if (loc != null) {
               loc.stop();
             }
           }
-        });
+        };
+        loc2.invoke(r);
+        loc1.invoke(r);
       }
     }
     
@@ -355,7 +360,7 @@ public class LocatorDUnitTest extends DistributedTestCase {
     properties.put("enable-network-partition-detection", "true");
     properties.put("disable-auto-reconnect", "true");
     
-    File logFile = new File(getUniqueName() + "-locator.log");
+    File logFile = new File("");
     if (logFile.exists()) {
       logFile.delete();
     }
@@ -379,7 +384,6 @@ public class LocatorDUnitTest extends DistributedTestCase {
     
     // connect three vms and then watch the lead member selection as they
     // are disconnected/reconnected
-    properties.put("log-file", "testLeadMemberSelection_vm1.log");
     properties.put("name", "vm1");
     DistributedMember mem1 = (DistributedMember)vm1.invoke(this.getClass(),
         "getDistributedMember", connectArgs);
@@ -387,13 +391,11 @@ public class LocatorDUnitTest extends DistributedTestCase {
 //    assertTrue(MembershipManagerHelper.getLeadMember(sys) != null);
     assertLeadMember(mem1, sys, 5000);
     
-    properties.put("log-file", "testLeadMemberSelection_vm2.log");
     properties.put("name", "vm2");
     DistributedMember mem2 = (DistributedMember)vm2.invoke(this.getClass(),
         "getDistributedMember", connectArgs);
     assertLeadMember(mem1, sys, 5000);
     
-    properties.put("log-file", "testLeadMemberSelection_vm3.log");
     properties.put("name", "vm3");
     DistributedMember mem3 = (DistributedMember)vm3.invoke(this.getClass(),
         "getDistributedMember", connectArgs);
@@ -404,7 +406,6 @@ public class LocatorDUnitTest extends DistributedTestCase {
     MembershipManagerHelper.getMembershipManager(sys).waitForDeparture(mem1);
     assertLeadMember(mem2, sys, 5000);
     
-    properties.put("log-file", "testLeadMemberSelection_vm1.log");
     properties.put("name", "vm1");
     mem1 = (DistributedMember)vm1.invoke(this.getClass(),
         "getDistributedMember", connectArgs);
@@ -484,18 +485,19 @@ public class LocatorDUnitTest extends DistributedTestCase {
     properties.put("enable-network-partition-detection", "true");
     properties.put("disable-auto-reconnect", "true");
     properties.put("member-timeout", "2000");
+//    properties.put("log-level", "fine");
     properties.put(DistributionConfig.ENABLE_CLUSTER_CONFIGURATION_NAME, "false");
     
     try {
       final String uname = getUniqueName();
-      File logFile = new File(uname + "-locator1-" + port1 + ".log");
+      File logFile = new File("");
       locator = Locator.startLocatorAndDS(port1, logFile, properties);
       final DistributedSystem sys = locator.getDistributedSystem();
       sys.getLogWriter().info("<ExpectedException action=add>java.net.ConnectException</ExpectedException>");
       MembershipManagerHelper.inhibitForcedDisconnectLogging(true);
       locvm.invoke(new SerializableRunnable() {
         public void run() {
-          File lf = new File(uname + "-locator2-" + port2 + ".log");
+          File lf = new File("");
           try {
             Locator.startLocatorAndDS(port2, lf, properties);
           }
@@ -618,12 +620,12 @@ public class LocatorDUnitTest extends DistributedTestCase {
     
     try {
       final String uname = getUniqueName();
-      File logFile = new File(uname + "-locator1-" + port1 + ".log");
+      File logFile = new File("");
       locator = Locator.startLocatorAndDS(port1, logFile, properties);
       final DistributedSystem sys = locator.getDistributedSystem();
       locvm3.invoke(new SerializableRunnable() {
         public void run() {
-          File lf = new File(uname + "-locator2-" + port2 + ".log");
+          File lf = new File("");
           try {
             Locator.startLocatorAndDS(port2, lf, properties);
           }
@@ -768,12 +770,12 @@ public class LocatorDUnitTest extends DistributedTestCase {
     
     try {
       final String uname = getUniqueName();
-      File logFile = new File(uname + "-locator1-" + port1 + ".log");
+      File logFile = new File("");
       locator = Locator.startLocatorAndDS(port1, logFile, properties);
       DistributedSystem sys = locator.getDistributedSystem();
       locvm.invoke(new SerializableRunnable() {
         public void run() {
-          File lf = new File(uname + "-locator2-" + port2 + ".log");
+          File lf = new File("");
           try {
             Locator.startLocatorAndDS(port2, lf, properties);
             MembershipManagerHelper.inhibitForcedDisconnectLogging(true);
@@ -909,18 +911,19 @@ public class LocatorDUnitTest extends DistributedTestCase {
     properties.put("enable-network-partition-detection", "true");
     properties.put("disable-auto-reconnect", "true");
     properties.put("member-timeout", "2000");
+//    properties.put("log-level", "fine");
     properties.put(DistributionConfig.ENABLE_CLUSTER_CONFIGURATION_NAME, "false");
 
     SerializableRunnable stopLocator = getStopLocatorRunnable();
     
     try {
       final String uname = getUniqueName();
-      File logFile = new File(uname + "-locator1-" + port1 + ".log");
+      File logFile = new File("");
       locator = Locator.startLocatorAndDS(port1, logFile, properties);
       DistributedSystem sys = locator.getDistributedSystem();
       locvm.invoke(new SerializableRunnable() {
         public void run() {
-          File lf = new File(uname + "-locator2-" + port2 + ".log");
+          File lf = new File("");
           try {
             Locator.startLocatorAndDS(port2, lf, properties);
           }
@@ -932,7 +935,6 @@ public class LocatorDUnitTest extends DistributedTestCase {
       
       Object[] connectArgs = new Object[]{ properties };
       
-      MembershipTestHook testHook;
       SerializableRunnable crashSystem =
         new SerializableRunnable("Crash system") {
           public void run() {
@@ -941,7 +943,13 @@ public class LocatorDUnitTest extends DistributedTestCase {
             msys.getLogWriter().info("<ExpectedException action=add>com.gemstone.gemfire.ConnectException</ExpectedException>");
             msys.getLogWriter().info("<ExpectedException action=add>com.gemstone.gemfire.ForcedDisconnectException</ExpectedException>");
             msys.getLogWriter().info("<ExpectedException action=add>Possible loss of quorum</ExpectedException>");
-            MembershipManagerHelper.crashDistributedSystem(msys);
+            hook = new TestHook();
+            MembershipManagerHelper.getMembershipManager(msys).registerTestHook(hook);
+            try {
+              MembershipManagerHelper.crashDistributedSystem(msys);
+            } finally {
+              hook.reset();
+            }
           }
       };
 
@@ -988,32 +996,25 @@ public class LocatorDUnitTest extends DistributedTestCase {
             msys.getLogWriter().info("<ExpectedException action=add>Membership: requesting removal of </ExpectedException>");
             mmgr.requestMemberRemoval(mem1, "test reasons");
             msys.getLogWriter().info("<ExpectedException action=remove>Membership: requesting removal of </ExpectedException>");
-            
             fail("It should have thrown exception in requestMemberRemoval");
           } catch (DistributedSystemDisconnectedException e) {
             Throwable cause = e.getCause();
             assertTrue(
                 "This should have been ForceDisconnectException but found "
                     + cause, cause instanceof ForcedDisconnectException);
+          } finally {
+            hook.reset();
           }
         }
       });
 
-      locvm.invoke(stopLocator);
-      // stop the locator normally.  This should also be okay
-      locator.stop();
-      
-      if (!Locator.getLocators().isEmpty()) {
-        // log this for debugging purposes before throwing assertion error
-        getLogWriter().warning("found locator " + Locator.getLocators().iterator().next());
-      }
-      assertTrue("locator is not stopped", Locator.getLocators().isEmpty());
     }
     finally {
       if (locator != null) {
         locator.stop();
       }
       locvm.invoke(stopLocator);
+      assertTrue("locator is not stopped", Locator.getLocators().isEmpty());
     }
   }
 
@@ -1071,7 +1072,7 @@ public class LocatorDUnitTest extends DistributedTestCase {
       final String uname = getUniqueName();
       locvm.invoke(new SerializableRunnable() {
         public void run() {
-          File lf = new File(uname + "-locator1-" + port1 + ".log");
+          File lf = new File("");
           try {
             Locator.startLocatorAndDS(port2, lf, properties);
           }
@@ -1081,7 +1082,7 @@ public class LocatorDUnitTest extends DistributedTestCase {
         }
       });
 
-      File logFile = new File(uname + "-locator2-" + port2 + ".log");
+      File logFile = new File("");
       locator = Locator.startLocatorAndDS(port1, logFile, properties);
       DistributedSystem sys = locator.getDistributedSystem();
       sys.getLogWriter().info("<ExpectedException action=add>com.gemstone.gemfire.ForcedDisconnectException</ExpectedException>");
@@ -1223,8 +1224,7 @@ public class LocatorDUnitTest extends DistributedTestCase {
     
     vm0.invoke(new SerializableRunnable("Start locator " + locators) {
         public void run() {
-          File logFile = new File(uniqueName + "-locator" + port
-                                  + ".log");
+          File logFile = new File("");
           try {
             Properties locProps = new Properties();
             locProps.setProperty("mcast-port", "0");
@@ -1466,8 +1466,7 @@ public class LocatorDUnitTest extends DistributedTestCase {
 
     vm0.invoke(new SerializableRunnable("Start locator on " + port1) {
       public void run() {
-        File logFile = new File(uniqueName + "-locator" + port1
-            + ".log");
+        File logFile = new File("");
         try {
           Locator.startLocatorAndDS(port1, logFile, dsProps);
         } catch (IOException ex) {
@@ -1481,8 +1480,7 @@ public class LocatorDUnitTest extends DistributedTestCase {
 
       vm3.invoke(new SerializableRunnable("Start locator on " + port2) {
         public void run() {
-          File logFile = new File(uniqueName + "-locator" +
-                                  port2 + ".log");
+          File logFile = new File("");
           try {
             Locator.startLocatorAndDS(port2, logFile, dsProps);
 
@@ -1579,8 +1577,7 @@ public class LocatorDUnitTest extends DistributedTestCase {
     
     vm0.invoke(new SerializableRunnable("Start locator on " + port1) {
         public void run() {
-          File logFile = new File(uniqueName + "-locator" + port1
-                                  + ".log");
+          File logFile = new File("");
           try {
             Properties props = new Properties();
             props.setProperty("mcast-port", String.valueOf(mcastport));
@@ -1597,8 +1594,7 @@ public class LocatorDUnitTest extends DistributedTestCase {
       });
     vm3.invoke(new SerializableRunnable("Start locator on " + port2) {
         public void run() {
-          File logFile = new File(uniqueName + "-locator" +
-                                  port2 + ".log");
+          File logFile = new File("");
           try {
             Properties props = new Properties();
             props.setProperty("mcast-port", String.valueOf(mcastport));
@@ -1682,7 +1678,7 @@ public class LocatorDUnitTest extends DistributedTestCase {
     port1 =
       AvailablePort.getRandomAvailablePort(AvailablePort.SOCKET);
     deleteLocatorStateFile(port1);
-    File logFile = new File(getUniqueName() + "-locator" + port1 + ".log");
+    File logFile = new File("");
     Locator locator = Locator.startLocator(port1, logFile);
     try {
 
@@ -1716,21 +1712,21 @@ public class LocatorDUnitTest extends DistributedTestCase {
       final Properties properties = new Properties();
       properties.put("mcast-port", "0");
       properties.put("locators", locators);
-      properties.put("enable-network-partition-detection", "true");
+      properties.put(DistributionConfig.ENABLE_NETWORK_PARTITION_DETECTION_NAME, "true");
       properties.put("disable-auto-reconnect", "true");
       properties.put(DistributionConfig.ENABLE_CLUSTER_CONFIGURATION_NAME, "false");
-      File logFile = new File(getUniqueName() + "-locatorA-" + port1 + ".log");
+      File logFile = new File("");
       locator = Locator.startLocatorAndDS(port1, logFile, properties);
   
       final Properties properties2 = new Properties();
       properties2.put("mcast-port", "0");
       properties2.put("locators", locators);
-      properties2.put("enable-network-partition-detection", "false");
+      properties2.put(DistributionConfig.ENABLE_NETWORK_PARTITION_DETECTION_NAME, "false");
       properties2.put(DistributionConfig.ENABLE_CLUSTER_CONFIGURATION_NAME, "false");
 
       properties.put("disable-auto-reconnect", "true");
       
-      vm1.invoke(new SerializableRunnable("try, but fail to connect") {
+      vm1.invoke(new SerializableRunnable("try to connect") {
         public void run() {
           DistributedSystem s = null;
           try {
@@ -1750,20 +1746,18 @@ public class LocatorDUnitTest extends DistributedTestCase {
       locator.stop();
       
       // now start the locator with enable-network-partition-detection=false
-      logFile = new File(getUniqueName() + "-locatorB-" + port1 + ".log");
+      logFile = new File("");
       locator = Locator.startLocatorAndDS(port1, logFile , properties2);
   
-      vm1.invoke(new SerializableRunnable("try, but fail to connect(2)") {
+      vm1.invoke(new SerializableRunnable("try to connect") {
         public void run() {
           DistributedSystem s = null;
-          try {
-            s = DistributedSystem.connect(properties);
-            s.disconnect();
+          s = DistributedSystem.connect(properties);
+          boolean enabled = ((InternalDistributedSystem)s).getConfig().getEnableNetworkPartitionDetection();
+          s.disconnect();
+          if (enabled) {
             fail("should not have been able to connect with different enable-network-partition-detection settings");
           }
-          catch (GemFireConfigException e) {
-            // passed
-          }
         }
       });
       
@@ -1788,14 +1782,14 @@ public class LocatorDUnitTest extends DistributedTestCase {
 
     port1 =
       AvailablePort.getRandomAvailablePort(AvailablePort.SOCKET);
-    File logFile1 = new File(getUniqueName() + "-locator" + port1 + ".log");
+    File logFile1 = new File("");
     Locator locator1 = Locator.startLocator(port1, logFile1);
     
     try {
 
     int port2 =
       AvailablePort.getRandomAvailablePort(AvailablePort.SOCKET);
-    File logFile2 = new File(getUniqueName() + "-locator" + port2 + ".log");
+    File logFile2 = new File("");
 
     deleteLocatorStateFile(port1, port2);
     
@@ -1850,7 +1844,7 @@ public class LocatorDUnitTest extends DistributedTestCase {
     port1 =
       AvailablePort.getRandomAvailablePort(AvailablePort.SOCKET);
     deleteLocatorStateFile(port1);
-    File logFile = new File(getUniqueName() + "-locator" + port1 + ".log");
+    File logFile = new File("");
     File stateFile = new File("locator"+port1+"state.dat");
     VM vm0 = Host.getHost(0).getVM(0);
     final Properties p = new Properties();
@@ -1928,8 +1922,7 @@ public class LocatorDUnitTest extends DistributedTestCase {
   private SerializableRunnable getStartSBLocatorRunnable(final int port, final String name) {
     return new SerializableRunnable("Start locator on port " + port) {
       public void run() {
-        File logFile = new File(name + "-locator" + port
-                                + ".log");
+        File logFile = new File("");
         try {
           System.setProperty("gemfire.disable-floating-coordinator", "true");
           System.setProperty("p2p.joinTimeout", "1000");
@@ -1962,6 +1955,31 @@ public class LocatorDUnitTest extends DistributedTestCase {
   }
 
   
+  //New test hook which blocks before closing channel.
+  class TestHook implements MembershipTestHook {
+
+    volatile boolean unboundedWait = true;
+    @Override
+    public void beforeMembershipFailure(String reason, Throwable cause) {
+      System.out.println("Inside TestHook.beforeMembershipFailure with " + cause);
+      long giveUp = System.currentTimeMillis() + 30000;
+      if (cause instanceof ForcedDisconnectException) {
+        while (unboundedWait && System.currentTimeMillis() < giveUp) {
+          pause(1000);
+        }
+      }
+      System.out.println("TestHook exiting");
+    }
+
+    @Override
+    public void afterMembershipFailure(String reason, Throwable cause) {
+    }
+
+    public void reset() {
+      unboundedWait = false;
+    }
+
+  }
   class MyMembershipListener implements MembershipListener {
     boolean quorumLostInvoked;
     

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/test/java/com/gemstone/gemfire/distributed/internal/membership/gms/membership/GMSHealthMonitorJUnitTest.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/test/java/com/gemstone/gemfire/distributed/internal/membership/gms/membership/GMSHealthMonitorJUnitTest.java b/gemfire-core/src/test/java/com/gemstone/gemfire/distributed/internal/membership/gms/membership/GMSHealthMonitorJUnitTest.java
index 3d05b89..e7962a3 100644
--- a/gemfire-core/src/test/java/com/gemstone/gemfire/distributed/internal/membership/gms/membership/GMSHealthMonitorJUnitTest.java
+++ b/gemfire-core/src/test/java/com/gemstone/gemfire/distributed/internal/membership/gms/membership/GMSHealthMonitorJUnitTest.java
@@ -30,6 +30,7 @@ import com.gemstone.gemfire.distributed.internal.membership.gms.messages.PingRes
 import com.gemstone.gemfire.distributed.internal.membership.gms.messages.RemoveMemberMessage;
 import com.gemstone.gemfire.distributed.internal.membership.gms.messages.SuspectMembersMessage;
 import com.gemstone.gemfire.distributed.internal.membership.gms.messages.SuspectRequest;
+import com.gemstone.gemfire.internal.SocketCreator;
 import com.gemstone.gemfire.test.junit.categories.UnitTest;
 
 @Category(UnitTest.class)
@@ -83,9 +84,11 @@ public class GMSHealthMonitorJUnitTest {
   public void testHMServiceStarted() throws IOException {
 
     MethodExecuted messageSent = new MethodExecuted();
+    InternalDistributedMember mbr = new InternalDistributedMember(SocketCreator.getLocalHost(), 12345);
+    when(messenger.getMemberID()).thenReturn(mbr);
     when(messenger.send(any(PingResponseMessage.class))).thenAnswer(messageSent);
 
-    gmsHealthMonitor.processMessage(new PingRequestMessage(1));
+    gmsHealthMonitor.processMessage(new PingRequestMessage(mbr, 1));
     Assert.assertTrue("Ping Response should have been sent", messageSent.isMethodExecuted());
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/01786106/gemfire-core/src/test/java/dunit/DistributedTestCase.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/test/java/dunit/DistributedTestCase.java b/gemfire-core/src/test/java/dunit/DistributedTestCase.java
index 56eb825..058980f 100755
--- a/gemfire-core/src/test/java/dunit/DistributedTestCase.java
+++ b/gemfire-core/src/test/java/dunit/DistributedTestCase.java
@@ -425,7 +425,7 @@ public abstract class DistributedTestCase extends TestCase implements java.io.Se
     if (!p.contains(DistributionConfig.DISABLE_AUTO_RECONNECT_NAME)) {
       p.put(DistributionConfig.DISABLE_AUTO_RECONNECT_NAME, "true");
     }
-        
+
     for (Iterator iter = props.entrySet().iterator();
     iter.hasNext(); ) {
       Map.Entry entry = (Map.Entry) iter.next();
@@ -1349,7 +1349,7 @@ public abstract class DistributedTestCase extends TestCase implements java.io.Se
    */
   public void deleteLocatorStateFile(int... ports) {
     for (int i=0; i<ports.length; i++) {
-      File stateFile = new File("locator"+ports[i]+"state.dat");
+      File stateFile = new File("locator"+ports[i]+"view.dat");
       if (stateFile.exists()) {
         stateFile.delete();
       }