You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@ignite.apache.org by "Pavel Kovalenko (JIRA)" <ji...@apache.org> on 2018/06/13 16:24:00 UTC
[jira] [Created] (IGNITE-8784) Deadlock during simultaneous client
reconnect and node stop
Pavel Kovalenko created IGNITE-8784:
---------------------------------------
Summary: Deadlock during simultaneous client reconnect and node stop
Key: IGNITE-8784
URL: https://issues.apache.org/jira/browse/IGNITE-8784
Project: Ignite
Issue Type: Bug
Components: cache
Affects Versions: 2.5
Reporter: Pavel Kovalenko
Fix For: 2.6
{noformat}
[18:48:22,665][ERROR][tcp-client-disco-msg-worker-#467%client%][IgniteKernal%client] Failed to reconnect, will stop node
class org.apache.ignite.IgniteException: Failed to wait for local node joined event (grid is stopping).
at org.apache.ignite.internal.managers.discovery.GridDiscoveryManager.localJoin(GridDiscoveryManager.java:2193)
at org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager.onKernalStart(GridCachePartitionExchangeManager.java:583)
at org.apache.ignite.internal.processors.cache.GridCacheSharedContext.onReconnected(GridCacheSharedContext.java:396)
at org.apache.ignite.internal.processors.cache.GridCacheProcessor.onReconnected(GridCacheProcessor.java:1159)
at org.apache.ignite.internal.IgniteKernal.onReconnected(IgniteKernal.java:3915)
at org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$4.onDiscovery0(GridDiscoveryManager.java:830)
at org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$4.onDiscovery(GridDiscoveryManager.java:589)
at org.apache.ignite.spi.discovery.tcp.ClientImpl$MessageWorker.notifyDiscovery(ClientImpl.java:2423)
at org.apache.ignite.spi.discovery.tcp.ClientImpl$MessageWorker.notifyDiscovery(ClientImpl.java:2402)
at org.apache.ignite.spi.discovery.tcp.ClientImpl$MessageWorker.processNodeAddFinishedMessage(ClientImpl.java:2047)
at org.apache.ignite.spi.discovery.tcp.ClientImpl$MessageWorker.processDiscoveryMessage(ClientImpl.java:1896)
at org.apache.ignite.spi.discovery.tcp.ClientImpl$MessageWorker.body(ClientImpl.java:1788)
at org.apache.ignite.spi.IgniteSpiThread.run(IgniteSpiThread.java:62)
Caused by: class org.apache.ignite.IgniteCheckedException: Failed to wait for local node joined event (grid is stopping).
at org.apache.ignite.internal.managers.discovery.GridDiscoveryManager.onKernalStop0(GridDiscoveryManager.java:1657)
at org.apache.ignite.internal.managers.GridManagerAdapter.onKernalStop(GridManagerAdapter.java:652)
at org.apache.ignite.internal.IgniteKernal.stop0(IgniteKernal.java:2218)
at org.apache.ignite.internal.IgniteKernal.stop(IgniteKernal.java:2166)
at org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop0(IgnitionEx.java:2588)
at org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop(IgnitionEx.java:2551)
at org.apache.ignite.internal.IgnitionEx.stop(IgnitionEx.java:372)
at org.apache.ignite.Ignition.stop(Ignition.java:229)
at org.apache.ignite.testframework.junits.GridAbstractTest.stopGrid(GridAbstractTest.java:1088)
at org.apache.ignite.testframework.junits.GridAbstractTest.stopAllGrids(GridAbstractTest.java:1128)
at org.apache.ignite.testframework.junits.GridAbstractTest.stopAllGrids(GridAbstractTest.java:1109)
at org.gridgain.grid.internal.processors.cache.database.IgniteDbSnapshotNotStableTopologiesTest.afterTest(IgniteDbSnapshotNotStableTopologiesTest.java:250)
at org.apache.ignite.testframework.junits.GridAbstractTest.tearDown(GridAbstractTest.java:1694)
at org.apache.ignite.testframework.junits.common.GridCommonAbstractTest.tearDown(GridCommonAbstractTest.java:492)
at junit.framework.TestCase.runBare(TestCase.java:146)
at junit.framework.TestResult$1.protect(TestResult.java:122)
at junit.framework.TestResult.runProtected(TestResult.java:142)
at junit.framework.TestResult.run(TestResult.java:125)
at junit.framework.TestCase.run(TestCase.java:129)
at junit.framework.TestSuite.runTest(TestSuite.java:255)
at junit.framework.TestSuite.run(TestSuite.java:250)
at junit.framework.TestSuite.runTest(TestSuite.java:255)
at junit.framework.TestSuite.run(TestSuite.java:250)
at org.junit.internal.runners.JUnit38ClassRunner.run(JUnit38ClassRunner.java:84)
at org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:369)
at org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:275)
at org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:239)
at org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:160)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.maven.surefire.util.ReflectionUtils.invokeMethodWithArray2(ReflectionUtils.java:206)
at org.apache.maven.surefire.booter.ProviderFactory$ProviderProxy.invoke(ProviderFactory.java:160)
at org.apache.maven.surefire.booter.ProviderFactory.invokeProvider(ProviderFactory.java:83)
at org.apache.maven.plugin.surefire.InPluginVMSurefireStarter.runSuitesInProcess(InPluginVMSurefireStarter.java:84)
at org.apache.maven.plugin.surefire.AbstractSurefireMojo.executeProvider(AbstractSurefireMojo.java:1107)
at org.apache.maven.plugin.surefire.AbstractSurefireMojo.executeAfterPreconditionsChecked(AbstractSurefireMojo.java:954)
at org.apache.maven.plugin.surefire.AbstractSurefireMojo.execute(AbstractSurefireMojo.java:832)
at org.apache.maven.plugin.DefaultBuildPluginManager.executeMojo(DefaultBuildPluginManager.java:137)
at org.apache.maven.lifecycle.internal.MojoExecutor.execute(MojoExecutor.java:208)
at org.apache.maven.lifecycle.internal.MojoExecutor.execute(MojoExecutor.java:154)
at org.apache.maven.lifecycle.internal.MojoExecutor.execute(MojoExecutor.java:146)
at org.apache.maven.lifecycle.internal.LifecycleModuleBuilder.buildProject(LifecycleModuleBuilder.java:117)
at org.apache.maven.lifecycle.internal.LifecycleModuleBuilder.buildProject(LifecycleModuleBuilder.java:81)
at org.apache.maven.lifecycle.internal.builder.singlethreaded.SingleThreadedBuilder.build(SingleThreadedBuilder.java:56)
at org.apache.maven.lifecycle.internal.LifecycleStarter.execute(LifecycleStarter.java:128)
at org.apache.maven.DefaultMaven.doExecute(DefaultMaven.java:305)
at org.apache.maven.DefaultMaven.doExecute(DefaultMaven.java:192)
at org.apache.maven.DefaultMaven.execute(DefaultMaven.java:105)
at org.apache.maven.cli.MavenCli.execute(MavenCli.java:956)
at org.apache.maven.cli.MavenCli.doMain(MavenCli.java:290)
at org.apache.maven.cli.MavenCli.main(MavenCli.java:194)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.codehaus.plexus.classworlds.launcher.Launcher.launchEnhanced(Launcher.java:289)
at org.codehaus.plexus.classworlds.launcher.Launcher.launch(Launcher.java:229)
at org.codehaus.plexus.classworlds.launcher.Launcher.mainWithExitCode(Launcher.java:415)
at org.codehaus.plexus.classworlds.launcher.Launcher.main(Launcher.java:356)
{noformat}
{noformat}
"main" #1 prio=5 os_prio=0 tid=0x00007fd36000e800 nid=0x53c2 in Object.wait() [0x00007fd3680df000]
java.lang.Thread.State: WAITING (on object monitor)
at java.lang.Object.wait(Native Method)
at java.lang.Thread.join(Thread.java:1252)
- locked <0x00000007aacf7fd0> (a org.apache.ignite.spi.discovery.tcp.ClientImpl$MessageWorker)
at java.lang.Thread.join(Thread.java:1326)
at org.apache.ignite.internal.util.IgniteUtils.join(IgniteUtils.java:4608)
at org.apache.ignite.spi.discovery.tcp.ClientImpl.spiStop(ClientImpl.java:313)
at org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi.spiStop(TcpDiscoverySpi.java:2079)
at org.apache.ignite.internal.managers.GridManagerAdapter.stopSpi(GridManagerAdapter.java:330)
at org.apache.ignite.internal.managers.discovery.GridDiscoveryManager.stop(GridDiscoveryManager.java:1675)
at org.apache.ignite.internal.IgniteKernal.stop0(IgniteKernal.java:2288)
at org.apache.ignite.internal.IgniteKernal.stop(IgniteKernal.java:2166)
at org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop0(IgnitionEx.java:2588)
- locked <0x00000007aacf8170> (a org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance)
at org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop(IgnitionEx.java:2551)
at org.apache.ignite.internal.IgnitionEx.stop(IgnitionEx.java:372)
at org.apache.ignite.Ignition.stop(Ignition.java:229)
at org.apache.ignite.testframework.junits.GridAbstractTest.stopGrid(GridAbstractTest.java:1088)
at org.apache.ignite.testframework.junits.GridAbstractTest.stopAllGrids(GridAbstractTest.java:1128)
at org.apache.ignite.testframework.junits.GridAbstractTest.stopAllGrids(GridAbstractTest.java:1109)
at org.gridgain.grid.internal.processors.cache.database.IgniteDbSnapshotNotStableTopologiesTest.afterTest(IgniteDbSnapshotNotStableTopologiesTest.java:250)
at org.apache.ignite.testframework.junits.GridAbstractTest.tearDown(GridAbstractTest.java:1694)
at org.apache.ignite.testframework.junits.common.GridCommonAbstractTest.tearDown(GridCommonAbstractTest.java:492)
at junit.framework.TestCase.runBare(TestCase.java:146)
at junit.framework.TestResult$1.protect(TestResult.java:122)
at junit.framework.TestResult.runProtected(TestResult.java:142)
at junit.framework.TestResult.run(TestResult.java:125)
at junit.framework.TestCase.run(TestCase.java:129)
at junit.framework.TestSuite.runTest(TestSuite.java:255)
at junit.framework.TestSuite.run(TestSuite.java:250)
at junit.framework.TestSuite.runTest(TestSuite.java:255)
at junit.framework.TestSuite.run(TestSuite.java:250)
at org.junit.internal.runners.JUnit38ClassRunner.run(JUnit38ClassRunner.java:84)
at org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:369)
at org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:275)
at org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:239)
at org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:160)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.maven.surefire.util.ReflectionUtils.invokeMethodWithArray2(ReflectionUtils.java:206)
at org.apache.maven.surefire.booter.ProviderFactory$ProviderProxy.invoke(ProviderFactory.java:160)
at org.apache.maven.surefire.booter.ProviderFactory.invokeProvider(ProviderFactory.java:83)
at org.apache.maven.plugin.surefire.InPluginVMSurefireStarter.runSuitesInProcess(InPluginVMSurefireStarter.java:84)
at org.apache.maven.plugin.surefire.AbstractSurefireMojo.executeProvider(AbstractSurefireMojo.java:1107)
at org.apache.maven.plugin.surefire.AbstractSurefireMojo.executeAfterPreconditionsChecked(AbstractSurefireMojo.java:954)
at org.apache.maven.plugin.surefire.AbstractSurefireMojo.execute(AbstractSurefireMojo.java:832)
at org.apache.maven.plugin.DefaultBuildPluginManager.executeMojo(DefaultBuildPluginManager.java:137)
at org.apache.maven.lifecycle.internal.MojoExecutor.execute(MojoExecutor.java:208)
at org.apache.maven.lifecycle.internal.MojoExecutor.execute(MojoExecutor.java:154)
at org.apache.maven.lifecycle.internal.MojoExecutor.execute(MojoExecutor.java:146)
at org.apache.maven.lifecycle.internal.LifecycleModuleBuilder.buildProject(LifecycleModuleBuilder.java:117)
at org.apache.maven.lifecycle.internal.LifecycleModuleBuilder.buildProject(LifecycleModuleBuilder.java:81)
at org.apache.maven.lifecycle.internal.builder.singlethreaded.SingleThreadedBuilder.build(SingleThreadedBuilder.java:56)
at org.apache.maven.lifecycle.internal.LifecycleStarter.execute(LifecycleStarter.java:128)
at org.apache.maven.DefaultMaven.doExecute(DefaultMaven.java:305)
at org.apache.maven.DefaultMaven.doExecute(DefaultMaven.java:192)
at org.apache.maven.DefaultMaven.execute(DefaultMaven.java:105)
at org.apache.maven.cli.MavenCli.execute(MavenCli.java:956)
at org.apache.maven.cli.MavenCli.doMain(MavenCli.java:290)
at org.apache.maven.cli.MavenCli.main(MavenCli.java:194)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.codehaus.plexus.classworlds.launcher.Launcher.launchEnhanced(Launcher.java:289)
at org.codehaus.plexus.classworlds.launcher.Launcher.launch(Launcher.java:229)
at org.codehaus.plexus.classworlds.launcher.Launcher.mainWithExitCode(Launcher.java:415)
at org.codehaus.plexus.classworlds.launcher.Launcher.main(Launcher.java:356)
{noformat}
{noformat}
"tcp-client-disco-msg-worker-#467%client%" #8800 prio=5 os_prio=0 tid=0x00007fd362555000 nid=0x76df waiting for monitor entry [0x00007fd1cb6f7000]
java.lang.Thread.State: BLOCKED (on object monitor)
at org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop0(IgnitionEx.java:2559)
- waiting to lock <0x00000007aacf8170> (a org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance)
at org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop(IgnitionEx.java:2551)
at org.apache.ignite.internal.IgnitionEx.stop(IgnitionEx.java:372)
at org.apache.ignite.Ignition.stop(Ignition.java:229)
at org.apache.ignite.internal.IgniteKernal.close(IgniteKernal.java:3487)
at org.apache.ignite.internal.IgniteKernal.onReconnected(IgniteKernal.java:3974)
at org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$4.onDiscovery0(GridDiscoveryManager.java:830)
at org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$4.onDiscovery(GridDiscoveryManager.java:589)
- locked <0x00000007a9caf4e8> (a java.lang.Object)
at org.apache.ignite.spi.discovery.tcp.ClientImpl$MessageWorker.notifyDiscovery(ClientImpl.java:2423)
at org.apache.ignite.spi.discovery.tcp.ClientImpl$MessageWorker.notifyDiscovery(ClientImpl.java:2402)
at org.apache.ignite.spi.discovery.tcp.ClientImpl$MessageWorker.processNodeAddFinishedMessage(ClientImpl.java:2047)
at org.apache.ignite.spi.discovery.tcp.ClientImpl$MessageWorker.processDiscoveryMessage(ClientImpl.java:1896)
at org.apache.ignite.spi.discovery.tcp.ClientImpl$MessageWorker.body(ClientImpl.java:1788)
at org.apache.ignite.spi.IgniteSpiThread.run(IgniteSpiThread.java:62)
{noformat}
Possible solution:
Complete local join future at DiscoveryManager with NodeStoppingException and don't invoke close() if node is already stopping.
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)