You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@geode.apache.org by "Dale Emery (JIRA)" <ji...@apache.org> on 2019/02/26 19:44:00 UTC
[jira] [Comment Edited] (GEODE-5676)
ClusterConfigLocatorRestartDUnitTest hung in CI
[ https://issues.apache.org/jira/browse/GEODE-5676?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16778491#comment-16778491 ]
Dale Emery edited comment on GEODE-5676 at 2/26/19 7:43 PM:
------------------------------------------------------------
Similar failure today in JDK8 CI:
{noformat}
"ReconnectThread" #166 prio=5 os_prio=0 tid=0x00007f53bcaff800 nid=0x5c7 in Object.wait() [0x00007f53998dc000]
java.lang.Thread.State: TIMED_WAITING (on object monitor)
at java.lang.Object.wait(Native Method)
at org.apache.geode.distributed.internal.membership.gms.membership.GMSJoinLeave.waitForJoinResponse(GMSJoinLeave.java:475)
- locked <0x00000000f67c8698> (a [Lorg.apache.geode.distributed.internal.membership.gms.messages.JoinResponseMessage;)
at org.apache.geode.distributed.internal.membership.gms.membership.GMSJoinLeave.attemptToJoin(GMSJoinLeave.java:437)
at org.apache.geode.distributed.internal.membership.gms.membership.GMSJoinLeave.join(GMSJoinLeave.java:344)
at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.join(GMSMembershipManager.java:659)
at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.joinDistributedSystem(GMSMembershipManager.java:742)
at org.apache.geode.distributed.internal.membership.gms.Services.start(Services.java:176)
at org.apache.geode.distributed.internal.membership.gms.GMSMemberFactory.newMembershipManager(GMSMemberFactory.java:106)
at org.apache.geode.distributed.internal.membership.MemberFactory.newMembershipManager(MemberFactory.java:93)
at org.apache.geode.distributed.internal.ClusterDistributionManager.<init>(ClusterDistributionManager.java:782)
at org.apache.geode.distributed.internal.ClusterDistributionManager.<init>(ClusterDistributionManager.java:900)
at org.apache.geode.distributed.internal.ClusterDistributionManager.create(ClusterDistributionManager.java:541)
at org.apache.geode.distributed.internal.InternalDistributedSystem.initialize(InternalDistributedSystem.java:830)
at org.apache.geode.distributed.internal.InternalDistributedSystem.newInstance(InternalDistributedSystem.java:442)
at org.apache.geode.distributed.internal.InternalDistributedSystem.newInstance(InternalDistributedSystem.java:428)
at org.apache.geode.distributed.internal.InternalDistributedSystem.connectInternal(InternalDistributedSystem.java:253)
- locked <0x00000000e02e7280> (a java.lang.Object)
at org.apache.geode.distributed.DistributedSystem.connect(DistributedSystem.java:164)
at org.apache.geode.distributed.internal.InternalDistributedSystem.reconnect(InternalDistributedSystem.java:2636)
at org.apache.geode.distributed.internal.InternalDistributedSystem.tryReconnect(InternalDistributedSystem.java:2463)
- locked <0x00000000e07179c0> (a java.lang.Object)
- locked <0x00000000e02225d0> (a java.lang.Class for org.apache.geode.internal.cache.GemFireCacheImpl)
- locked <0x00000000e02e7290> (a java.lang.Class for org.apache.geode.cache.CacheFactory)
at org.apache.geode.distributed.internal.InternalDistributedSystem.disconnect(InternalDistributedSystem.java:1278)
at org.apache.geode.distributed.internal.ClusterDistributionManager$DMListener.membershipFailure(ClusterDistributionManager.java:3424)
at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.uncleanShutdown(GMSMembershipManager.java:1554)
at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.lambda$forceDisconnect$3(GMSMembershipManager.java:2586)
at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager$$Lambda$138/133092046.run(Unknown Source)
at java.lang.Thread.run(Thread.java:748)
Locked ownable synchronizers:
- <0x00000000f67c8380> (a java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync)
{noformat}
{noformat}
"RMI TCP Connection(13)-172.17.0.4" #270 daemon prio=5 os_prio=0 tid=0x00007f53c4001800 nid=0x6c6 waiting for monitor entry [0x00007f53dcffb000]
java.lang.Thread.State: BLOCKED (on object monitor)
at org.apache.geode.internal.cache.GemFireCacheImpl.close(GemFireCacheImpl.java:2152)
- waiting to lock <0x00000000e02225d0> (a java.lang.Class for org.apache.geode.internal.cache.GemFireCacheImpl)
at org.apache.geode.internal.cache.GemFireCacheImpl.close(GemFireCacheImpl.java:1998)
at org.apache.geode.internal.cache.GemFireCacheImpl.close(GemFireCacheImpl.java:1988)
at org.apache.geode.test.junit.rules.ServerStarterRule.stopMember(ServerStarterRule.java:95)
at org.apache.geode.test.junit.rules.MemberStarterRule.after(MemberStarterRule.java:137)
at org.apache.geode.test.dunit.rules.ClusterStartupRule.stopElementInsideVM(ClusterStartupRule.java:375)
at org.apache.geode.test.junit.rules.VMProvider.lambda$stop$fe0d42dc$1(VMProvider.java:58)
at org.apache.geode.test.junit.rules.VMProvider$$Lambda$134/26349826.run(Unknown Source)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.geode.test.dunit.internal.MethodInvoker.executeObject(MethodInvoker.java:123)
at org.apache.geode.test.dunit.internal.RemoteDUnitVM.executeMethodOnObject(RemoteDUnitVM.java:69)
at sun.reflect.GeneratedMethodAccessor9.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at sun.rmi.server.UnicastServerRef.dispatch(UnicastServerRef.java:357)
at sun.rmi.transport.Transport$1.run(Transport.java:200)
at sun.rmi.transport.Transport$1.run(Transport.java:197)
at java.security.AccessController.doPrivileged(Native Method)
at sun.rmi.transport.Transport.serviceCall(Transport.java:196)
at sun.rmi.transport.tcp.TCPTransport.handleMessages(TCPTransport.java:573)
at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run0(TCPTransport.java:834)
at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.lambda$run$0(TCPTransport.java:688)
at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler$$Lambda$8/259076937.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run(TCPTransport.java:687)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Locked ownable synchronizers:
- <0x00000000e0503e90> (a java.util.concurrent.ThreadPoolExecutor$Worker)
{noformat}
was (Author: demery):
Similar deadlock today in JDK8 CI:
{noformat}
"ReconnectThread" #166 prio=5 os_prio=0 tid=0x00007f53bcaff800 nid=0x5c7 in Object.wait() [0x00007f53998dc000]
java.lang.Thread.State: TIMED_WAITING (on object monitor)
at java.lang.Object.wait(Native Method)
at org.apache.geode.distributed.internal.membership.gms.membership.GMSJoinLeave.waitForJoinResponse(GMSJoinLeave.java:475)
- locked <0x00000000f67c8698> (a [Lorg.apache.geode.distributed.internal.membership.gms.messages.JoinResponseMessage;)
at org.apache.geode.distributed.internal.membership.gms.membership.GMSJoinLeave.attemptToJoin(GMSJoinLeave.java:437)
at org.apache.geode.distributed.internal.membership.gms.membership.GMSJoinLeave.join(GMSJoinLeave.java:344)
at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.join(GMSMembershipManager.java:659)
at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.joinDistributedSystem(GMSMembershipManager.java:742)
at org.apache.geode.distributed.internal.membership.gms.Services.start(Services.java:176)
at org.apache.geode.distributed.internal.membership.gms.GMSMemberFactory.newMembershipManager(GMSMemberFactory.java:106)
at org.apache.geode.distributed.internal.membership.MemberFactory.newMembershipManager(MemberFactory.java:93)
at org.apache.geode.distributed.internal.ClusterDistributionManager.<init>(ClusterDistributionManager.java:782)
at org.apache.geode.distributed.internal.ClusterDistributionManager.<init>(ClusterDistributionManager.java:900)
at org.apache.geode.distributed.internal.ClusterDistributionManager.create(ClusterDistributionManager.java:541)
at org.apache.geode.distributed.internal.InternalDistributedSystem.initialize(InternalDistributedSystem.java:830)
at org.apache.geode.distributed.internal.InternalDistributedSystem.newInstance(InternalDistributedSystem.java:442)
at org.apache.geode.distributed.internal.InternalDistributedSystem.newInstance(InternalDistributedSystem.java:428)
at org.apache.geode.distributed.internal.InternalDistributedSystem.connectInternal(InternalDistributedSystem.java:253)
- locked <0x00000000e02e7280> (a java.lang.Object)
at org.apache.geode.distributed.DistributedSystem.connect(DistributedSystem.java:164)
at org.apache.geode.distributed.internal.InternalDistributedSystem.reconnect(InternalDistributedSystem.java:2636)
at org.apache.geode.distributed.internal.InternalDistributedSystem.tryReconnect(InternalDistributedSystem.java:2463)
- locked <0x00000000e07179c0> (a java.lang.Object)
- locked <0x00000000e02225d0> (a java.lang.Class for org.apache.geode.internal.cache.GemFireCacheImpl)
- locked <0x00000000e02e7290> (a java.lang.Class for org.apache.geode.cache.CacheFactory)
at org.apache.geode.distributed.internal.InternalDistributedSystem.disconnect(InternalDistributedSystem.java:1278)
at org.apache.geode.distributed.internal.ClusterDistributionManager$DMListener.membershipFailure(ClusterDistributionManager.java:3424)
at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.uncleanShutdown(GMSMembershipManager.java:1554)
at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.lambda$forceDisconnect$3(GMSMembershipManager.java:2586)
at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager$$Lambda$138/133092046.run(Unknown Source)
at java.lang.Thread.run(Thread.java:748)
Locked ownable synchronizers:
- <0x00000000f67c8380> (a java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync)
{noformat}
{noformat}
"RMI TCP Connection(13)-172.17.0.4" #270 daemon prio=5 os_prio=0 tid=0x00007f53c4001800 nid=0x6c6 waiting for monitor entry [0x00007f53dcffb000]
java.lang.Thread.State: BLOCKED (on object monitor)
at org.apache.geode.internal.cache.GemFireCacheImpl.close(GemFireCacheImpl.java:2152)
- waiting to lock <0x00000000e02225d0> (a java.lang.Class for org.apache.geode.internal.cache.GemFireCacheImpl)
at org.apache.geode.internal.cache.GemFireCacheImpl.close(GemFireCacheImpl.java:1998)
at org.apache.geode.internal.cache.GemFireCacheImpl.close(GemFireCacheImpl.java:1988)
at org.apache.geode.test.junit.rules.ServerStarterRule.stopMember(ServerStarterRule.java:95)
at org.apache.geode.test.junit.rules.MemberStarterRule.after(MemberStarterRule.java:137)
at org.apache.geode.test.dunit.rules.ClusterStartupRule.stopElementInsideVM(ClusterStartupRule.java:375)
at org.apache.geode.test.junit.rules.VMProvider.lambda$stop$fe0d42dc$1(VMProvider.java:58)
at org.apache.geode.test.junit.rules.VMProvider$$Lambda$134/26349826.run(Unknown Source)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.geode.test.dunit.internal.MethodInvoker.executeObject(MethodInvoker.java:123)
at org.apache.geode.test.dunit.internal.RemoteDUnitVM.executeMethodOnObject(RemoteDUnitVM.java:69)
at sun.reflect.GeneratedMethodAccessor9.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at sun.rmi.server.UnicastServerRef.dispatch(UnicastServerRef.java:357)
at sun.rmi.transport.Transport$1.run(Transport.java:200)
at sun.rmi.transport.Transport$1.run(Transport.java:197)
at java.security.AccessController.doPrivileged(Native Method)
at sun.rmi.transport.Transport.serviceCall(Transport.java:196)
at sun.rmi.transport.tcp.TCPTransport.handleMessages(TCPTransport.java:573)
at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run0(TCPTransport.java:834)
at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.lambda$run$0(TCPTransport.java:688)
at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler$$Lambda$8/259076937.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run(TCPTransport.java:687)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Locked ownable synchronizers:
- <0x00000000e0503e90> (a java.util.concurrent.ThreadPoolExecutor$Worker)
{noformat}
> ClusterConfigLocatorRestartDUnitTest hung in CI
> -----------------------------------------------
>
> Key: GEODE-5676
> URL: https://issues.apache.org/jira/browse/GEODE-5676
> Project: Geode
> Issue Type: Bug
> Components: benchmarks
> Reporter: Dan Smith
> Assignee: Bruce Schuchardt
> Priority: Major
> Labels: pull-request-available, swat
> Attachments: callstacks.txt
>
> Time Spent: 20m
> Remaining Estimate: 0h
>
> This test hung in a couple of runs of DistributedTest
> https://concourse.apachegeode-ci.info/teams/staging/pipelines/concourse-staging/jobs/DistributedTest/builds//430
> https://concourse.apachegeode-ci.info/teams/staging/pipelines/concourse-staging/jobs/DistributedTest/builds//370
> {noformat}
> Started @ 2018-08-30 04:23:46.599 +0000
> 2018-08-30 04:48:33.135 +0000 org.apache.geode.management.internal.configuration.ClusterConfigLocatorRestartDUnitTest serverRestartsAfterLocatorReconnects
> Ended @ 2018-08-30 05:21:34.897 +0000
> {noformat}
> It seems to be stuck in tear down
> {noformat}
> "ReconnectThread" #416 prio=5 os_prio=0 tid=0x00007fa86cad2000 nid=0xd07 in Object.wait() [0x00007fa744ecd000]
> java.lang.Thread.State: TIMED_WAITING (on object monitor)
> at java.lang.Object.wait(Native Method)
> at org.apache.geode.distributed.internal.InternalDistributedSystem.reconnect(InternalDistributedSystem.java:2697)
> at org.apache.geode.distributed.internal.InternalDistributedSystem.tryReconnect(InternalDistributedSystem.java:2558)
> - locked <0x00000000e00bedc8> (a java.lang.Object)
> - locked <0x00000000e07af498> (a java.lang.Class for org.apache.geode.internal.cache.GemFireCacheImpl)
> - locked <0x00000000e00bedd8> (a java.lang.Class for org.apache.geode.cache.CacheFactory)
> at org.apache.geode.distributed.internal.InternalDistributedSystem.disconnect(InternalDistributedSystem.java:1041)
> at org.apache.geode.distributed.internal.ClusterDistributionManager$DMListener.membershipFailure(ClusterDistributionManager.java:3987)
> at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.uncleanShutdown(GMSMembershipManager.java:1552)
> at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager.lambda$forceDisconnect$1(GMSMembershipManager.java:2564)
> at org.apache.geode.distributed.internal.membership.gms.mgr.GMSMembershipManager$$Lambda$81/1816825082.run(Unknown Source)
> at java.lang.Thread.run(Thread.java:748)
> Locked ownable synchronizers:
> - None
> "RMI TCP Connection(8)-172.17.0.13" #32 daemon prio=5 os_prio=0 tid=0x00007fa874001800 nid=0x2ff waiting for monitor entry [0x00007fa8f0d15000]
> java.lang.Thread.State: BLOCKED (on object monitor)
> at org.apache.geode.distributed.internal.InternalDistributedSystem.disconnect(InternalDistributedSystem.java:1367)
> - waiting to lock <0x00000000e07af498> (a java.lang.Class for org.apache.geode.internal.cache.GemFireCacheImpl)
> at org.apache.geode.distributed.internal.InternalDistributedSystem.disconnect(InternalDistributedSystem.java:1022)
> at org.apache.geode.test.junit.rules.MemberStarterRule.disconnectDSIfAny(MemberStarterRule.java:182)
> at org.apache.geode.test.junit.rules.MemberStarterRule.after(MemberStarterRule.java:129)
> at org.apache.geode.test.dunit.rules.ClusterStartupRule.stopElementInsideVM(ClusterStartupRule.java:385)
> at org.apache.geode.test.junit.rules.VMProvider.lambda$stop$fe0d42dc$1(VMProvider.java:42)
> at org.apache.geode.test.junit.rules.VMProvider$$Lambda$77/1844235204.run(Unknown Source)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:498)
> at hydra.MethExecutor.executeObject(MethExecutor.java:244)
> at org.apache.geode.test.dunit.standalone.RemoteDUnitVM.executeMethodOnObject(RemoteDUnitVM.java:70)
> at sun.reflect.GeneratedMethodAccessor116.invoke(Unknown Source)
> at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:498)
> at sun.rmi.server.UnicastServerRef.dispatch(UnicastServerRef.java:357)
> at sun.rmi.transport.Transport$1.run(Transport.java:200)
> at sun.rmi.transport.Transport$1.run(Transport.java:197)
> at java.security.AccessController.doPrivileged(Native Method)
> at sun.rmi.transport.Transport.serviceCall(Transport.java:196)
> at sun.rmi.transport.tcp.TCPTransport.handleMessages(TCPTransport.java:573)
> at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run0(TCPTransport.java:834)
> at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.lambda$run$0(TCPTransport.java:688)
> at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler$$Lambda$7/137422085.run(Unknown Source)
> at java.security.AccessController.doPrivileged(Native Method)
> at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run(TCPTransport.java:687)
> at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:748)
> Locked ownable synchronizers:
> - <0x00000000e0639ed0> (a java.util.concurrent.ThreadPoolExecutor$Worker)
> {noformat}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)