You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@geode.apache.org by "Dale Emery (JIRA)" <ji...@apache.org> on 2019/02/26 19:44:00 UTC

[jira] [Comment Edited] (GEODE-5676) ClusterConfigLocatorRestartDUnitTest hung in CI

    [ https://issues.apache.org/jira/browse/GEODE-5676?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16778491#comment-16778491 ] 

Dale Emery edited comment on GEODE-5676 at 2/26/19 7:43 PM:
------------------------------------------------------------

Similar failure today in JDK8 CI:
{noformat}
"ReconnectThread" #166 prio=5 os_prio=0 tid=0x00007f53bcaff800 nid=0x5c7 in Object.wait() [0x00007f53998dc000]
   java.lang.Thread.State: TIMED_WAITING (on object monitor)
	at java.lang.Object.wait(Native Method)
	at org.apache.geode.distributed.internal.membership.gms.membership.GMSJoinLeave.waitForJoinResponse(GMSJoinLeave.java:475)
	- locked <0x00000000f67c8698> (a [Lorg.apache.geode.distributed.internal.membership.gms.messages.JoinResponseMessage;)
	at org.apache.geode.distributed.internal.membership.gms.membership.GMSJoinLeave.attemptToJoin(GMSJoinLeave.java:437)
	at org.apache.geode.distributed.internal.membership.gms.membership.GMSJoinLeave.join(GMSJoinLeave.java:344)
	at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.join(GMSMembershipManager.java:659)
	at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.joinDistributedSystem(GMSMembershipManager.java:742)
	at org.apache.geode.distributed.internal.membership.gms.Services.start(Services.java:176)
	at org.apache.geode.distributed.internal.membership.gms.GMSMemberFactory.newMembershipManager(GMSMemberFactory.java:106)
	at org.apache.geode.distributed.internal.membership.MemberFactory.newMembershipManager(MemberFactory.java:93)
	at org.apache.geode.distributed.internal.ClusterDistributionManager.<init>(ClusterDistributionManager.java:782)
	at org.apache.geode.distributed.internal.ClusterDistributionManager.<init>(ClusterDistributionManager.java:900)
	at org.apache.geode.distributed.internal.ClusterDistributionManager.create(ClusterDistributionManager.java:541)
	at org.apache.geode.distributed.internal.InternalDistributedSystem.initialize(InternalDistributedSystem.java:830)
	at org.apache.geode.distributed.internal.InternalDistributedSystem.newInstance(InternalDistributedSystem.java:442)
	at org.apache.geode.distributed.internal.InternalDistributedSystem.newInstance(InternalDistributedSystem.java:428)
	at org.apache.geode.distributed.internal.InternalDistributedSystem.connectInternal(InternalDistributedSystem.java:253)
	- locked <0x00000000e02e7280> (a java.lang.Object)
	at org.apache.geode.distributed.DistributedSystem.connect(DistributedSystem.java:164)
	at org.apache.geode.distributed.internal.InternalDistributedSystem.reconnect(InternalDistributedSystem.java:2636)
	at org.apache.geode.distributed.internal.InternalDistributedSystem.tryReconnect(InternalDistributedSystem.java:2463)
	- locked <0x00000000e07179c0> (a java.lang.Object)
	- locked <0x00000000e02225d0> (a java.lang.Class for org.apache.geode.internal.cache.GemFireCacheImpl)
	- locked <0x00000000e02e7290> (a java.lang.Class for org.apache.geode.cache.CacheFactory)
	at org.apache.geode.distributed.internal.InternalDistributedSystem.disconnect(InternalDistributedSystem.java:1278)
	at org.apache.geode.distributed.internal.ClusterDistributionManager$DMListener.membershipFailure(ClusterDistributionManager.java:3424)
	at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.uncleanShutdown(GMSMembershipManager.java:1554)
	at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.lambda$forceDisconnect$3(GMSMembershipManager.java:2586)
	at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager$$Lambda$138/133092046.run(Unknown Source)
	at java.lang.Thread.run(Thread.java:748)

   Locked ownable synchronizers:
	- <0x00000000f67c8380> (a java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync)
{noformat}
{noformat}
  "RMI TCP Connection(13)-172.17.0.4" #270 daemon prio=5 os_prio=0 tid=0x00007f53c4001800 nid=0x6c6 waiting for monitor entry [0x00007f53dcffb000]
   java.lang.Thread.State: BLOCKED (on object monitor)
	at org.apache.geode.internal.cache.GemFireCacheImpl.close(GemFireCacheImpl.java:2152)
	- waiting to lock <0x00000000e02225d0> (a java.lang.Class for org.apache.geode.internal.cache.GemFireCacheImpl)
	at org.apache.geode.internal.cache.GemFireCacheImpl.close(GemFireCacheImpl.java:1998)
	at org.apache.geode.internal.cache.GemFireCacheImpl.close(GemFireCacheImpl.java:1988)
	at org.apache.geode.test.junit.rules.ServerStarterRule.stopMember(ServerStarterRule.java:95)
	at org.apache.geode.test.junit.rules.MemberStarterRule.after(MemberStarterRule.java:137)
	at org.apache.geode.test.dunit.rules.ClusterStartupRule.stopElementInsideVM(ClusterStartupRule.java:375)
	at org.apache.geode.test.junit.rules.VMProvider.lambda$stop$fe0d42dc$1(VMProvider.java:58)
	at org.apache.geode.test.junit.rules.VMProvider$$Lambda$134/26349826.run(Unknown Source)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at org.apache.geode.test.dunit.internal.MethodInvoker.executeObject(MethodInvoker.java:123)
	at org.apache.geode.test.dunit.internal.RemoteDUnitVM.executeMethodOnObject(RemoteDUnitVM.java:69)
	at sun.reflect.GeneratedMethodAccessor9.invoke(Unknown Source)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at sun.rmi.server.UnicastServerRef.dispatch(UnicastServerRef.java:357)
	at sun.rmi.transport.Transport$1.run(Transport.java:200)
	at sun.rmi.transport.Transport$1.run(Transport.java:197)
	at java.security.AccessController.doPrivileged(Native Method)
	at sun.rmi.transport.Transport.serviceCall(Transport.java:196)
	at sun.rmi.transport.tcp.TCPTransport.handleMessages(TCPTransport.java:573)
	at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run0(TCPTransport.java:834)
	at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.lambda$run$0(TCPTransport.java:688)
	at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler$$Lambda$8/259076937.run(Unknown Source)
	at java.security.AccessController.doPrivileged(Native Method)
	at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run(TCPTransport.java:687)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:748)

   Locked ownable synchronizers:
	- <0x00000000e0503e90> (a java.util.concurrent.ThreadPoolExecutor$Worker)
{noformat}


was (Author: demery):
Similar deadlock today in JDK8 CI:

{noformat}
"ReconnectThread" #166 prio=5 os_prio=0 tid=0x00007f53bcaff800 nid=0x5c7 in Object.wait() [0x00007f53998dc000]
   java.lang.Thread.State: TIMED_WAITING (on object monitor)
	at java.lang.Object.wait(Native Method)
	at org.apache.geode.distributed.internal.membership.gms.membership.GMSJoinLeave.waitForJoinResponse(GMSJoinLeave.java:475)
	- locked <0x00000000f67c8698> (a [Lorg.apache.geode.distributed.internal.membership.gms.messages.JoinResponseMessage;)
	at org.apache.geode.distributed.internal.membership.gms.membership.GMSJoinLeave.attemptToJoin(GMSJoinLeave.java:437)
	at org.apache.geode.distributed.internal.membership.gms.membership.GMSJoinLeave.join(GMSJoinLeave.java:344)
	at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.join(GMSMembershipManager.java:659)
	at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.joinDistributedSystem(GMSMembershipManager.java:742)
	at org.apache.geode.distributed.internal.membership.gms.Services.start(Services.java:176)
	at org.apache.geode.distributed.internal.membership.gms.GMSMemberFactory.newMembershipManager(GMSMemberFactory.java:106)
	at org.apache.geode.distributed.internal.membership.MemberFactory.newMembershipManager(MemberFactory.java:93)
	at org.apache.geode.distributed.internal.ClusterDistributionManager.<init>(ClusterDistributionManager.java:782)
	at org.apache.geode.distributed.internal.ClusterDistributionManager.<init>(ClusterDistributionManager.java:900)
	at org.apache.geode.distributed.internal.ClusterDistributionManager.create(ClusterDistributionManager.java:541)
	at org.apache.geode.distributed.internal.InternalDistributedSystem.initialize(InternalDistributedSystem.java:830)
	at org.apache.geode.distributed.internal.InternalDistributedSystem.newInstance(InternalDistributedSystem.java:442)
	at org.apache.geode.distributed.internal.InternalDistributedSystem.newInstance(InternalDistributedSystem.java:428)
	at org.apache.geode.distributed.internal.InternalDistributedSystem.connectInternal(InternalDistributedSystem.java:253)
	- locked <0x00000000e02e7280> (a java.lang.Object)
	at org.apache.geode.distributed.DistributedSystem.connect(DistributedSystem.java:164)
	at org.apache.geode.distributed.internal.InternalDistributedSystem.reconnect(InternalDistributedSystem.java:2636)
	at org.apache.geode.distributed.internal.InternalDistributedSystem.tryReconnect(InternalDistributedSystem.java:2463)
	- locked <0x00000000e07179c0> (a java.lang.Object)
	- locked <0x00000000e02225d0> (a java.lang.Class for org.apache.geode.internal.cache.GemFireCacheImpl)
	- locked <0x00000000e02e7290> (a java.lang.Class for org.apache.geode.cache.CacheFactory)
	at org.apache.geode.distributed.internal.InternalDistributedSystem.disconnect(InternalDistributedSystem.java:1278)
	at org.apache.geode.distributed.internal.ClusterDistributionManager$DMListener.membershipFailure(ClusterDistributionManager.java:3424)
	at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.uncleanShutdown(GMSMembershipManager.java:1554)
	at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.lambda$forceDisconnect$3(GMSMembershipManager.java:2586)
	at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager$$Lambda$138/133092046.run(Unknown Source)
	at java.lang.Thread.run(Thread.java:748)

   Locked ownable synchronizers:
	- <0x00000000f67c8380> (a java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync)
{noformat}

{noformat}
  "RMI TCP Connection(13)-172.17.0.4" #270 daemon prio=5 os_prio=0 tid=0x00007f53c4001800 nid=0x6c6 waiting for monitor entry [0x00007f53dcffb000]
   java.lang.Thread.State: BLOCKED (on object monitor)
	at org.apache.geode.internal.cache.GemFireCacheImpl.close(GemFireCacheImpl.java:2152)
	- waiting to lock <0x00000000e02225d0> (a java.lang.Class for org.apache.geode.internal.cache.GemFireCacheImpl)
	at org.apache.geode.internal.cache.GemFireCacheImpl.close(GemFireCacheImpl.java:1998)
	at org.apache.geode.internal.cache.GemFireCacheImpl.close(GemFireCacheImpl.java:1988)
	at org.apache.geode.test.junit.rules.ServerStarterRule.stopMember(ServerStarterRule.java:95)
	at org.apache.geode.test.junit.rules.MemberStarterRule.after(MemberStarterRule.java:137)
	at org.apache.geode.test.dunit.rules.ClusterStartupRule.stopElementInsideVM(ClusterStartupRule.java:375)
	at org.apache.geode.test.junit.rules.VMProvider.lambda$stop$fe0d42dc$1(VMProvider.java:58)
	at org.apache.geode.test.junit.rules.VMProvider$$Lambda$134/26349826.run(Unknown Source)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at org.apache.geode.test.dunit.internal.MethodInvoker.executeObject(MethodInvoker.java:123)
	at org.apache.geode.test.dunit.internal.RemoteDUnitVM.executeMethodOnObject(RemoteDUnitVM.java:69)
	at sun.reflect.GeneratedMethodAccessor9.invoke(Unknown Source)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at sun.rmi.server.UnicastServerRef.dispatch(UnicastServerRef.java:357)
	at sun.rmi.transport.Transport$1.run(Transport.java:200)
	at sun.rmi.transport.Transport$1.run(Transport.java:197)
	at java.security.AccessController.doPrivileged(Native Method)
	at sun.rmi.transport.Transport.serviceCall(Transport.java:196)
	at sun.rmi.transport.tcp.TCPTransport.handleMessages(TCPTransport.java:573)
	at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run0(TCPTransport.java:834)
	at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.lambda$run$0(TCPTransport.java:688)
	at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler$$Lambda$8/259076937.run(Unknown Source)
	at java.security.AccessController.doPrivileged(Native Method)
	at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run(TCPTransport.java:687)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:748)

   Locked ownable synchronizers:
	- <0x00000000e0503e90> (a java.util.concurrent.ThreadPoolExecutor$Worker)
{noformat}


> ClusterConfigLocatorRestartDUnitTest hung in CI
> -----------------------------------------------
>
>                 Key: GEODE-5676
>                 URL: https://issues.apache.org/jira/browse/GEODE-5676
>             Project: Geode
>          Issue Type: Bug
>          Components: benchmarks
>            Reporter: Dan Smith
>            Assignee: Bruce Schuchardt
>            Priority: Major
>              Labels: pull-request-available, swat
>         Attachments: callstacks.txt
>
>          Time Spent: 20m
>  Remaining Estimate: 0h
>
> This test hung in a couple of runs of DistributedTest
>   https://concourse.apachegeode-ci.info/teams/staging/pipelines/concourse-staging/jobs/DistributedTest/builds//430
>   https://concourse.apachegeode-ci.info/teams/staging/pipelines/concourse-staging/jobs/DistributedTest/builds//370
> {noformat}
> Started @ 2018-08-30 04:23:46.599 +0000
> 2018-08-30 04:48:33.135 +0000  org.apache.geode.management.internal.configuration.ClusterConfigLocatorRestartDUnitTest serverRestartsAfterLocatorReconnects
> Ended @ 2018-08-30 05:21:34.897 +0000
> {noformat}
> It seems to be stuck in tear down
> {noformat}
> "ReconnectThread" #416 prio=5 os_prio=0 tid=0x00007fa86cad2000 nid=0xd07 in Object.wait() [0x00007fa744ecd000]
>    java.lang.Thread.State: TIMED_WAITING (on object monitor)
> 	at java.lang.Object.wait(Native Method)
> 	at org.apache.geode.distributed.internal.InternalDistributedSystem.reconnect(InternalDistributedSystem.java:2697)
> 	at org.apache.geode.distributed.internal.InternalDistributedSystem.tryReconnect(InternalDistributedSystem.java:2558)
> 	- locked <0x00000000e00bedc8> (a java.lang.Object)
> 	- locked <0x00000000e07af498> (a java.lang.Class for org.apache.geode.internal.cache.GemFireCacheImpl)
> 	- locked <0x00000000e00bedd8> (a java.lang.Class for org.apache.geode.cache.CacheFactory)
> 	at org.apache.geode.distributed.internal.InternalDistributedSystem.disconnect(InternalDistributedSystem.java:1041)
> 	at org.apache.geode.distributed.internal.ClusterDistributionManager$DMListener.membershipFailure(ClusterDistributionManager.java:3987)
> 	at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.uncleanShutdown(GMSMembershipManager.java:1552)
> 	at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.lambda$forceDisconnect$1(GMSMembershipManager.java:2564)
> 	at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager$$Lambda$81/1816825082.run(Unknown Source)
> 	at java.lang.Thread.run(Thread.java:748)
>    Locked ownable synchronizers:
> 	- None
> "RMI TCP Connection(8)-172.17.0.13" #32 daemon prio=5 os_prio=0 tid=0x00007fa874001800 nid=0x2ff waiting for monitor entry [0x00007fa8f0d15000]
>    java.lang.Thread.State: BLOCKED (on object monitor)
> 	at org.apache.geode.distributed.internal.InternalDistributedSystem.disconnect(InternalDistributedSystem.java:1367)
> 	- waiting to lock <0x00000000e07af498> (a java.lang.Class for org.apache.geode.internal.cache.GemFireCacheImpl)
> 	at org.apache.geode.distributed.internal.InternalDistributedSystem.disconnect(InternalDistributedSystem.java:1022)
> 	at org.apache.geode.test.junit.rules.MemberStarterRule.disconnectDSIfAny(MemberStarterRule.java:182)
> 	at org.apache.geode.test.junit.rules.MemberStarterRule.after(MemberStarterRule.java:129)
> 	at org.apache.geode.test.dunit.rules.ClusterStartupRule.stopElementInsideVM(ClusterStartupRule.java:385)
> 	at org.apache.geode.test.junit.rules.VMProvider.lambda$stop$fe0d42dc$1(VMProvider.java:42)
> 	at org.apache.geode.test.junit.rules.VMProvider$$Lambda$77/1844235204.run(Unknown Source)
> 	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> 	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 	at java.lang.reflect.Method.invoke(Method.java:498)
> 	at hydra.MethExecutor.executeObject(MethExecutor.java:244)
> 	at org.apache.geode.test.dunit.standalone.RemoteDUnitVM.executeMethodOnObject(RemoteDUnitVM.java:70)
> 	at sun.reflect.GeneratedMethodAccessor116.invoke(Unknown Source)
> 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 	at java.lang.reflect.Method.invoke(Method.java:498)
> 	at sun.rmi.server.UnicastServerRef.dispatch(UnicastServerRef.java:357)
> 	at sun.rmi.transport.Transport$1.run(Transport.java:200)
> 	at sun.rmi.transport.Transport$1.run(Transport.java:197)
> 	at java.security.AccessController.doPrivileged(Native Method)
> 	at sun.rmi.transport.Transport.serviceCall(Transport.java:196)
> 	at sun.rmi.transport.tcp.TCPTransport.handleMessages(TCPTransport.java:573)
> 	at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run0(TCPTransport.java:834)
> 	at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.lambda$run$0(TCPTransport.java:688)
> 	at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler$$Lambda$7/137422085.run(Unknown Source)
> 	at java.security.AccessController.doPrivileged(Native Method)
> 	at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run(TCPTransport.java:687)
> 	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> 	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> 	at java.lang.Thread.run(Thread.java:748)
>    Locked ownable synchronizers:
> 	- <0x00000000e0639ed0> (a java.util.concurrent.ThreadPoolExecutor$Worker)
> {noformat}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)