You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@geode.apache.org by bs...@apache.org on 2016/03/09 23:32:41 UTC

incubator-geode git commit: GEODE-952: Some time many unit tests fails because locator was ForcedDisconnect

Repository: incubator-geode
Updated Branches:
  refs/heads/develop 70ca9214f -> 445efdb9e


GEODE-952: Some time many unit tests fails because locator was ForcedDisconnect

We're seeing a number of similar failures that all seem to be caused by JVMs
pausing and being kicked out of the distributed system.   This change-set
enables creation of a heap dump if a member is forced out of the system and
JVM pauses have been detected.  This will give us artifacts that we can
analyze to help determine what's going on.


Project: http://git-wip-us.apache.org/repos/asf/incubator-geode/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-geode/commit/445efdb9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-geode/tree/445efdb9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-geode/diff/445efdb9

Branch: refs/heads/develop
Commit: 445efdb9e80326d4de4cc6b71e32dd80e04543b8
Parents: 70ca921
Author: Bruce Schuchardt <bs...@pivotal.io>
Authored: Wed Mar 9 11:36:13 2016 -0800
Committer: Bruce Schuchardt <bs...@pivotal.io>
Committed: Wed Mar 9 14:31:30 2016 -0800

----------------------------------------------------------------------
 .../internal/InternalDistributedSystem.java     | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/445efdb9/geode-core/src/main/java/com/gemstone/gemfire/distributed/internal/InternalDistributedSystem.java
----------------------------------------------------------------------
diff --git a/geode-core/src/main/java/com/gemstone/gemfire/distributed/internal/InternalDistributedSystem.java b/geode-core/src/main/java/com/gemstone/gemfire/distributed/internal/InternalDistributedSystem.java
index a193699..92cb9f8 100644
--- a/geode-core/src/main/java/com/gemstone/gemfire/distributed/internal/InternalDistributedSystem.java
+++ b/geode-core/src/main/java/com/gemstone/gemfire/distributed/internal/InternalDistributedSystem.java
@@ -21,6 +21,7 @@ import java.io.File;
 import java.io.IOException;
 import java.io.Reader;
 import java.lang.reflect.Array;
+import java.lang.reflect.Method;
 import java.net.InetAddress;
 import java.util.ArrayList;
 import java.util.Date;
@@ -76,6 +77,7 @@ import com.gemstone.gemfire.internal.InternalDataSerializer;
 import com.gemstone.gemfire.internal.InternalInstantiator;
 import com.gemstone.gemfire.internal.LinuxProcFsStatistics;
 import com.gemstone.gemfire.internal.LocalStatisticsImpl;
+import com.gemstone.gemfire.internal.OSProcess;
 import com.gemstone.gemfire.internal.OsStatisticsFactory;
 import com.gemstone.gemfire.internal.SocketCreator;
 import com.gemstone.gemfire.internal.StatisticsImpl;
@@ -83,8 +85,8 @@ import com.gemstone.gemfire.internal.StatisticsManager;
 import com.gemstone.gemfire.internal.StatisticsTypeFactoryImpl;
 import com.gemstone.gemfire.internal.SystemTimer;
 import com.gemstone.gemfire.internal.admin.remote.DistributionLocatorId;
-import com.gemstone.gemfire.internal.cache.CacheServerImpl;
 import com.gemstone.gemfire.internal.cache.CacheConfig;
+import com.gemstone.gemfire.internal.cache.CacheServerImpl;
 import com.gemstone.gemfire.internal.cache.EventID;
 import com.gemstone.gemfire.internal.cache.GemFireCacheImpl;
 import com.gemstone.gemfire.internal.cache.execute.FunctionServiceStats;
@@ -944,6 +946,22 @@ public class InternalDistributedSystem
     if (isForcedDisconnect) {
       this.forcedDisconnect = true;
       resetReconnectAttemptCounter();
+      if (sampler.isSamplingEnabled()) {
+        if (sampler.getStatSamplerStats().getJvmPauses() > 0) {
+          try {
+            // if running tests then create a heap dump
+            Class.forName("com.gemstone.gemfire.test.dunit.standalone.DUnitLauncher");
+            Class<?> jmapClass = Class.forName("sun.tools.jmap.JMap");
+            logger.info("This member of the distributed system has been forced to disconnect.  JVM pauses have been detected - dumping heap");
+            String pid = String.valueOf(OSProcess.getId());
+            String fileName = "java"+pid+".hprof";
+            Object parameters = new String[]{"-dump:format=b,file="+fileName, pid};
+            Method main = jmapClass.getDeclaredMethod("main", String[].class);
+            main.invoke(null, parameters);
+          } catch (Exception e) {
+          }
+        }
+      }
     
      reconnected = tryReconnect(true, reason, GemFireCacheImpl.getInstance());
     }