You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@ignite.apache.org by dark <ek...@gmail.com> on 2017/11/12 12:17:09 UTC

[Ignite 2.0.0] Stopping the node in order to prevent cluster wide instability.

Hi team,

I have a problem about Ignite Cluster.

Nodes die in 10 hour increments, leaving the following logs: And, when a
cluster is configured, only one node is used at a high rate. This part seems
to have some influence. The log when an issue occurs is shown below.

[08:11:15,903][ERROR][tcp-disco-msg-worker-#2%null%][TcpDiscoverySpi]
TcpDiscoverSpi's message worker thread failed abnormally. Stopping the node
in order to prevent cluster wide instability.
java.lang.NullPointerException
        at
org.apache.ignite.internal.processors.cache.IgniteCacheOffheapManagerImpl.entriesCount(IgniteCacheOffheapManagerImpl.java:250)
        at
org.apache.ignite.internal.processors.cache.CacheMetricsImpl.getOffHeapPrimaryEntriesCount(CacheMetricsImpl.java:198)
        at
org.apache.ignite.internal.processors.cache.CacheMetricsSnapshot.<init>(CacheMetricsSnapshot.java:246)
        at
org.apache.ignite.internal.processors.cache.GridCacheAdapter.localMetrics(GridCacheAdapter.java:3211)
        at
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$7.cacheMetrics(GridDiscoveryManager.java:975)
        at
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$7.nonHeapMemoryUsed(GridDiscoveryManager.java:945)
        at
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$7.metrics(GridDiscoveryManager.java:911)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.processMetricsUpdateMessage(ServerImpl.java:4946)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.processMessage(ServerImpl.java:2562)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.processMessage(ServerImpl.java:2364)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl$MessageWorkerAdapter.body(ServerImpl.java:6460)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.body(ServerImpl.java:2450)
        at
org.apache.ignite.spi.IgniteSpiThread.run(IgniteSpiThread.java:62)
[08:11:15,941][ERROR][tcp-disco-msg-worker-#2%null%][TcpDiscoverySpi]
Runtime error caught during grid runnable execution: IgniteSpiThread
[name=tcp-disco-msg-worker-#2%null%]
java.lang.NullPointerException
        at
org.apache.ignite.internal.processors.cache.IgniteCacheOffheapManagerImpl.entriesCount(IgniteCacheOffheapManagerImpl.java:250)
        at
org.apache.ignite.internal.processors.cache.CacheMetricsImpl.getOffHeapPrimaryEntriesCount(CacheMetricsImpl.java:198)
        at
org.apache.ignite.internal.processors.cache.CacheMetricsSnapshot.<init>(CacheMetricsSnapshot.java:246)
        at
org.apache.ignite.internal.processors.cache.GridCacheAdapter.localMetrics(GridCacheAdapter.java:3211)
        at
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$7.cacheMetrics(GridDiscoveryManager.java:975)
        at
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$7.nonHeapMemoryUsed(GridDiscoveryManager.java:945)
        at
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$7.metrics(GridDiscoveryManager.java:911)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.processMetricsUpdateMessage(ServerImpl.java:4946)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.processMessage(ServerImpl.java:2562)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.processMessage(ServerImpl.java:2364)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl$MessageWorkerAdapter.body(ServerImpl.java:6460)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.body(ServerImpl.java:2450)
        at
org.apache.ignite.spi.IgniteSpiThread.run(IgniteSpiThread.java:62)
[08:11:15,963][INFO ][node-stop-thread][GridTcpRestProtocol] Command
protocol successfully stopped: TCP binary
[08:11:16,093][ERROR][sys-stripe-5-#6%null%][GridCacheIoManager] Failed
processing message [senderId=6eea7d5f-ef14-4988-b69e-738d2e7c8550,
msg=GridDhtAtomicSingleUpdateRequest [key=KeyCacheObjectImpl [part=61,
val=60_1510441260, hasValBytes=true], val=null, prevVal=null,
super=GridDhtAtomicAbstractUpdateRequest [onRes=false, nearNodeId=null,
nearFutId=0, flags=]]]
java.lang.IllegalStateException: Failed to remove from index (grid is
stopping).
        at
org.apache.ignite.internal.processors.query.GridQueryProcessor.remove(GridQueryProcessor.java:1983)
        at
org.apache.ignite.internal.processors.cache.query.GridCacheQueryManager.remove(GridCacheQueryManager.java:452)
        at
org.apache.ignite.internal.processors.cache.IgniteCacheOffheapManagerImpl$CacheDataStoreImpl.finishRemove(IgniteCacheOffheapManagerImpl.java:1131)
        at
org.apache.ignite.internal.processors.cache.IgniteCacheOffheapManagerImpl$CacheDataStoreImpl.invoke(IgniteCacheOffheapManagerImpl.java:941)
        at
org.apache.ignite.internal.processors.cache.IgniteCacheOffheapManagerImpl.invoke(IgniteCacheOffheapManagerImpl.java:326)
        at
org.apache.ignite.internal.processors.cache.GridCacheMapEntry.innerUpdate(GridCacheMapEntry.java:1693)
        at
org.apache.ignite.internal.processors.cache.distributed.dht.atomic.GridDhtAtomicCache.processDhtAtomicUpdateRequest(GridDhtAtomicCache.java:3120)
        at
org.apache.ignite.internal.processors.cache.distributed.dht.atomic.GridDhtAtomicCache.access$600(GridDhtAtomicCache.java:127)
        at
org.apache.ignite.internal.processors.cache.distributed.dht.atomic.GridDhtAtomicCache$8.apply(GridDhtAtomicCache.java:319)
        at
org.apache.ignite.internal.processors.cache.distributed.dht.atomic.GridDhtAtomicCache$8.apply(GridDhtAtomicCache.java:314)
        at
org.apache.ignite.internal.processors.cache.GridCacheIoManager.processMessage(GridCacheIoManager.java:863)
        at
org.apache.ignite.internal.processors.cache.GridCacheIoManager.onMessage0(GridCacheIoManager.java:386)
        at
org.apache.ignite.internal.processors.cache.GridCacheIoManager.handleMessage(GridCacheIoManager.java:308)
        at
org.apache.ignite.internal.processors.cache.GridCacheIoManager.access$000(GridCacheIoManager.java:100)
        at
org.apache.ignite.internal.processors.cache.GridCacheIoManager$1.onMessage(GridCacheIoManager.java:253)
        at
org.apache.ignite.internal.managers.communication.GridIoManager.invokeListener(GridIoManager.java:1257)
        at
org.apache.ignite.internal.managers.communication.GridIoManager.processRegularMessage0(GridIoManager.java:885)
        at
org.apache.ignite.internal.managers.communication.GridIoManager.access$2100(GridIoManager.java:114)
        at
org.apache.ignite.internal.managers.communication.GridIoManager$7.run(GridIoManager.java:802)
        at
org.apache.ignite.internal.util.StripedExecutor$Stripe.run(StripedExecutor.java:483)
        at java.lang.Thread.run(Thread.java:745)

Thanks



--
Sent from: http://apache-ignite-users.70518.x6.nabble.com/

Re: [Ignite 2.0.0] Stopping the node in order to prevent cluster wide instability.

Posted by dark <ek...@gmail.com>.
P2P class loading is disabled and works properly. 
Thank you. :)




--
Sent from: http://apache-ignite-users.70518.x6.nabble.com/

Re: [Ignite 2.0.0] Stopping the node in order to prevent cluster wide instability.

Posted by Nikolay Izhikov <ni...@apache.org>.
Hello, Valentin.

I try to take a look at this bug.


В Чт, 01/02/2018 в 12:35 -0700, vkulichenko пишет:
> Well, then you need IGNITE-3653 to be fixed I believe. Unfortunately, it's
> not assigned to anyone currently, so apparently no one is working on it. Are
> you willing to pick it up and contribute?
> 
> -Val
> 
> 
> 
> --
> Sent from: http://apache-ignite-users.70518.x6.nabble.com/

Re: [Ignite 2.0.0] Stopping the node in order to prevent cluster wide instability.

Posted by vkulichenko <va...@gmail.com>.
Well, then you need IGNITE-3653 to be fixed I believe. Unfortunately, it's
not assigned to anyone currently, so apparently no one is working on it. Are
you willing to pick it up and contribute?

-Val



--
Sent from: http://apache-ignite-users.70518.x6.nabble.com/

Re: [Ignite 2.0.0] Stopping the node in order to prevent cluster wide instability.

Posted by 1MoreIgniteUser <wc...@neovest.com>.
I tried that, and it does work with it disabled but i needed the
peerClassLoading enabled. we have a microservice setup and so lots of
different things are interacting with our ignite cluster to get data. we
have stuff making continuous queries and regular sql queries. so multiple
different apps are throwing runnable's into the ignite cluster and what not
so we need the peerClassLoading because it would be really really ugly code
for us to statically tell ignite that its going to get asked to use so many
different classes. 

Thankfully, I was able to figure this out. In reading more about the
peerClassLoading on the ignite api website.
https://apacheignite.readme.io/docs/zero-deployment 

I have added a few more properties to my ignite config (and the client
connection ignite config) and it's working now.

<bean class="org.apache.ignite.configuration.IgniteConfiguration"
id="ignite.cfg">
    <property name="peerClassLoadingEnabled" value="true"/>
    <property name="deploymentMode" value="CONTINUOUS"/>
    <property name="peerClassLoadingMissedResourcesCacheSize" value="0"/>

....


so i used the deployment Mode Continuous instead of the default 'shared' and
i've eliminated the missedResourceCacheSize to ensure that all the nodes
have the same info on the classes it needs. 




--
Sent from: http://apache-ignite-users.70518.x6.nabble.com/

Re: [Ignite 2.0.0] Stopping the node in order to prevent cluster wide instability.

Posted by vkulichenko <va...@gmail.com>.
This looks like this issue: https://issues.apache.org/jira/browse/IGNITE-3653

Do you have P2P class loading enabled? If yes, can you try to disable it?

-Val



--
Sent from: http://apache-ignite-users.70518.x6.nabble.com/

Re: [Ignite 2.0.0] Stopping the node in order to prevent cluster wide instability.

Posted by wcherry <wc...@neovest.com>.
I am also experiencing this issue. I'm running ignite in a kubernetes cluster
and I am trying to do a rolling update. so I have 2 ignite nodes running and
I am using K8's rolling update api in a deployment. eg. I am running an
application that starts up the 2 nodes. the nodes cluster and I then build
my project through a jenkins pipeline and use Helm to upgrade the
deployment. k8 takes over and with the deployment brings one node down, puts
it back up, waits a minute and then brings the other down and puts it back
up. 

https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#rolling-update-deployment

as it does this sometimes it works and other times ignite fails to connect
to the other node and cluster. 
k8 brings down a node and tries to put it back up but because it fails K8
stops the rolling update. so we have an old node running and new broken
node. 


[16:38:45]    __________  ________________
[16:38:45]   /  _/ ___/ |/ /  _/_  __/ __/
[16:38:45]  _/ // (7 7    // /  / / / _/
[16:38:45] /___/\___/_/|_/___/ /_/ /___/
[16:38:45]
[16:38:45] ver. 2.3.0#20171028-sha1:8add7fd5
[16:38:45] 2017 Copyright(C) Apache Software Foundation
[16:38:45]
[16:38:45] Ignite documentation: http://ignite.apache.org
[16:38:45]
[16:38:45] Quiet mode.
[16:38:45]   ^-- To see **FULL** console log here add -DIGNITE_QUIET=false
or "-v" to ignite.{sh|bat}
[16:38:45]
[16:38:45] OS: Linux 4.4.0-77-generic amd64
[16:38:45] VM information: Java(TM) SE Runtime Environment 1.8.0_152-b16
Oracle Corporation Java HotSpot(TM) 64-Bit Server VM 25.152-b16
[16:38:45] Configured plugins:
[16:38:45]   ^-- None
[16:38:45]
[16:38:46] Message queue limit is set to 0 which may lead to potential OOMEs
when running cache operations in FULL_ASYNC or PRIMARY_SYNC modes due to
message queues growth on sender and receiver sides.
[16:38:46] Security status [authentication=off, tls/ssl=off]

SEVERE: TcpDiscoverSpi's message worker thread failed abnormally. Stopping
the node in order to prevent cluster wide instability.
java.lang.NullPointerException
        at
org.apache.ignite.internal.processors.cache.query.continuous.CacheContinuousQueryHandlerV2.getEventFilter(CacheContinuousQueryHandlerV2.java:111)
        at
org.apache.ignite.internal.processors.cache.query.continuous.CacheContinuousQueryHandler.register(CacheContinuousQueryHandler.java:315)
        at
org.apache.ignite.internal.processors.continuous.GridContinuousProcessor.registerHandler(GridContinuousProcessor.java:1228)
        at
org.apache.ignite.internal.processors.continuous.GridContinuousProcessor.onDiscoDataReceived(GridContinuousProcessor.java:523)
        at
org.apache.ignite.internal.processors.continuous.GridContinuousProcessor.onGridDataReceived(GridContinuousProcessor.java:478)
        at
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$5.onExchange(GridDiscoveryManager.java:855)
        at
org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi.onExchange(TcpDiscoverySpi.java:1837)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.processNodeAddedMessage(ServerImpl.java:4328)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.processMessage(ServerImpl.java:2635)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.processMessage(ServerImpl.java:2447)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl$MessageWorkerAdapter.body(ServerImpl.java:6648)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.body(ServerImpl.java:2533)
        at
org.apache.ignite.spi.IgniteSpiThread.run(IgniteSpiThread.java:62)

Jan 30, 2018 4:38:48 PM org.apache.ignite.logger.java.JavaLogger error
SEVERE: Runtime error caught during grid runnable execution: IgniteSpiThread
[name=tcp-disco-msg-worker-#2]
java.lang.NullPointerException
        at
org.apache.ignite.internal.processors.cache.query.continuous.CacheContinuousQueryHandlerV2.getEventFilter(CacheContinuousQueryHandlerV2.java:111)
        at
org.apache.ignite.internal.processors.cache.query.continuous.CacheContinuousQueryHandler.register(CacheContinuousQueryHandler.java:315)
        at
org.apache.ignite.internal.processors.continuous.GridContinuousProcessor.registerHandler(GridContinuousProcessor.java:1228)
        at
org.apache.ignite.internal.processors.continuous.GridContinuousProcessor.onDiscoDataReceived(GridContinuousProcessor.java:523)
        at
org.apache.ignite.internal.processors.continuous.GridContinuousProcessor.onGridDataReceived(GridContinuousProcessor.java:478)
        at
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$5.onExchange(GridDiscoveryManager.java:855)
        at
org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi.onExchange(TcpDiscoverySpi.java:1837)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.processNodeAddedMessage(ServerImpl.java:4328)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.processMessage(ServerImpl.java:2635)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.processMessage(ServerImpl.java:2447)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl$MessageWorkerAdapter.body(ServerImpl.java:6648)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.body(ServerImpl.java:2533)
        at
org.apache.ignite.spi.IgniteSpiThread.run(IgniteSpiThread.java:62)

Jan 30, 2018 4:38:48 PM org.apache.ignite.logger.java.JavaLogger error
SEVERE: Failed to start manager: GridManagerAdapter [enabled=true,
name=o.a.i.i.managers.discovery.GridDiscoveryManager]
class org.apache.ignite.IgniteCheckedException: Failed to start SPI:
TcpDiscoverySpi [addrRslvr=null, sockTimeout=5000, ackTimeout=5000,
marsh=JdkMarshaller [], reconCnt=10, maxAckTimeout=600000,
forceSrvMode=false, clientReconnectDisabled=false]
        at
org.apache.ignite.internal.managers.GridManagerAdapter.startSpi(GridManagerAdapter.java:300)
        at
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager.start(GridDiscoveryManager.java:882)
        at
org.apache.ignite.internal.IgniteKernal.startManager(IgniteKernal.java:1852)
        at
org.apache.ignite.internal.IgniteKernal.start(IgniteKernal.java:1002)
        at
org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.start0(IgnitionEx.java:1909)
        at
org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.start(IgnitionEx.java:1652)
        at
org.apache.ignite.internal.IgnitionEx.start0(IgnitionEx.java:1080)
        at
org.apache.ignite.internal.IgnitionEx.startConfigurations(IgnitionEx.java:998)
        at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.java:884)
        at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.java:803)
        at org.apache.ignite.Ignition.start(Ignition.java:372)
        at
com.mycompay.source.code.IgniteNodeModule.provideIgniteCluster(IgniteNodeModule.java:24)
        at
com.mycompay.source.code.IgniteNodeModule$$FastClassByGuice$$cc13dccd.invoke(<generated>)
        at
com.google.inject.internal.ProviderMethod$FastClassProviderMethod.doProvision(ProviderMethod.java:264)
        at
com.google.inject.internal.ProviderMethod$Factory.provision(ProviderMethod.java:401)
        at
com.google.inject.internal.ProviderMethod$Factory.get(ProviderMethod.java:376)
        at
com.google.inject.internal.ProviderToInternalFactoryAdapter$1.call(ProviderToInternalFactoryAdapter.java:46)
        at
com.google.inject.internal.InjectorImpl.callInContext(InjectorImpl.java:1092)
        at
com.google.inject.internal.ProviderToInternalFactoryAdapter.get(ProviderToInternalFactoryAdapter.java:40)
        at
com.google.inject.internal.SingletonScope$1.get(SingletonScope.java:194)
        at
com.google.inject.internal.InternalFactoryToProviderAdapter.get(InternalFactoryToProviderAdapter.java:41)
        at
com.google.inject.internal.SingleParameterInjector.inject(SingleParameterInjector.java:38)
        at
com.google.inject.internal.SingleParameterInjector.getAll(SingleParameterInjector.java:62)
        at
com.google.inject.internal.ProviderMethod$Factory.provision(ProviderMethod.java:402)
        at
com.google.inject.internal.ProviderMethod$Factory.get(ProviderMethod.java:376)
        at
com.google.inject.internal.ProviderToInternalFactoryAdapter$1.call(ProviderToInternalFactoryAdapter.java:46)
        at
com.google.inject.internal.InjectorImpl.callInContext(InjectorImpl.java:1092)
        at
com.google.inject.internal.ProviderToInternalFactoryAdapter.get(ProviderToInternalFactoryAdapter.java:40)
        at
com.google.inject.internal.SingletonScope$1.get(SingletonScope.java:194)
        at
com.google.inject.internal.InternalFactoryToProviderAdapter.get(InternalFactoryToProviderAdapter.java:41)
        at
com.google.inject.internal.SingleParameterInjector.inject(SingleParameterInjector.java:38)
        at
com.google.inject.internal.SingleParameterInjector.getAll(SingleParameterInjector.java:62)
        at
com.google.inject.internal.ConstructorInjector.provision(ConstructorInjector.java:110)
        at
com.google.inject.internal.ConstructorInjector.construct(ConstructorInjector.java:90)
        at
com.google.inject.internal.ConstructorBindingImpl$Factory.get(ConstructorBindingImpl.java:268)
        at
com.google.inject.internal.InjectorImpl$2$1.call(InjectorImpl.java:1019)
        at
com.google.inject.internal.InjectorImpl.callInContext(InjectorImpl.java:1085)
        at
com.google.inject.internal.InjectorImpl$2.get(InjectorImpl.java:1015)
        at
com.google.inject.internal.InjectorImpl.getInstance(InjectorImpl.java:1054)
        at com.mycompay.source.code.IgniteNode.main(IgniteNode.java:12)
Caused by: class org.apache.ignite.spi.IgniteSpiException: Thread has been
interrupted.
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl.joinTopology(ServerImpl.java:908)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl.spiStart(ServerImpl.java:360)
        at
org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi.spiStart(TcpDiscoverySpi.java:1846)
        at
org.apache.ignite.internal.managers.GridManagerAdapter.startSpi(GridManagerAdapter.java:297)
        ... 39 more

Jan 30, 2018 4:38:48 PM org.apache.ignite.logger.java.JavaLogger error
SEVERE: Got exception while starting (will rollback startup routine).
class org.apache.ignite.IgniteCheckedException: Failed to start manager:
GridManagerAdapter [enabled=true,
name=org.apache.ignite.internal.managers.discovery.GridDiscoveryManager]
        at
org.apache.ignite.internal.IgniteKernal.startManager(IgniteKernal.java:1857)
        at
org.apache.ignite.internal.IgniteKernal.start(IgniteKernal.java:1002)
        at
org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.start0(IgnitionEx.java:1909)
        at
org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.start(IgnitionEx.java:1652)
        at
org.apache.ignite.internal.IgnitionEx.start0(IgnitionEx.java:1080)
        at
org.apache.ignite.internal.IgnitionEx.startConfigurations(IgnitionEx.java:998)
        at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.java:884)
        at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.java:803)
        at org.apache.ignite.Ignition.start(Ignition.java:372)
        at
com.mycompay.source.code.IgniteNodeModule.provideIgniteCluster(IgniteNodeModule.java:24)
        at
com.mycompay.source.code.IgniteNodeModule$$FastClassByGuice$$cc13dccd.invoke(<generated>)
        at
com.google.inject.internal.ProviderMethod$FastClassProviderMethod.doProvision(ProviderMethod.java:264)
        at
com.google.inject.internal.ProviderMethod$Factory.provision(ProviderMethod.java:401)
        at
com.google.inject.internal.ProviderMethod$Factory.get(ProviderMethod.java:376)
        at
com.google.inject.internal.ProviderToInternalFactoryAdapter$1.call(ProviderToInternalFactoryAdapter.java:46)
        at
com.google.inject.internal.InjectorImpl.callInContext(InjectorImpl.java:1092)
        at
com.google.inject.internal.ProviderToInternalFactoryAdapter.get(ProviderToInternalFactoryAdapter.java:40)
        at
com.google.inject.internal.SingletonScope$1.get(SingletonScope.java:194)
        at
com.google.inject.internal.InternalFactoryToProviderAdapter.get(InternalFactoryToProviderAdapter.java:41)
        at
com.google.inject.internal.SingleParameterInjector.inject(SingleParameterInjector.java:38)
        at
com.google.inject.internal.SingleParameterInjector.getAll(SingleParameterInjector.java:62)
        at
com.google.inject.internal.ProviderMethod$Factory.provision(ProviderMethod.java:402)
        at
com.google.inject.internal.ProviderMethod$Factory.get(ProviderMethod.java:376)
        at
com.google.inject.internal.ProviderToInternalFactoryAdapter$1.call(ProviderToInternalFactoryAdapter.java:46)
        at
com.google.inject.internal.InjectorImpl.callInContext(InjectorImpl.java:1092)
        at
com.google.inject.internal.ProviderToInternalFactoryAdapter.get(ProviderToInternalFactoryAdapter.java:40)
        at
com.google.inject.internal.SingletonScope$1.get(SingletonScope.java:194)
        at
com.google.inject.internal.InternalFactoryToProviderAdapter.get(InternalFactoryToProviderAdapter.java:41)
        at
com.google.inject.internal.SingleParameterInjector.inject(SingleParameterInjector.java:38)
        at
com.google.inject.internal.SingleParameterInjector.getAll(SingleParameterInjector.java:62)
        at
com.google.inject.internal.ConstructorInjector.provision(ConstructorInjector.java:110)
        at
com.google.inject.internal.ConstructorInjector.construct(ConstructorInjector.java:90)
        at
com.google.inject.internal.ConstructorBindingImpl$Factory.get(ConstructorBindingImpl.java:268)
        at
com.google.inject.internal.InjectorImpl$2$1.call(InjectorImpl.java:1019)
        at
com.google.inject.internal.InjectorImpl.callInContext(InjectorImpl.java:1085)
        at
com.google.inject.internal.InjectorImpl$2.get(InjectorImpl.java:1015)
        at
com.google.inject.internal.InjectorImpl.getInstance(InjectorImpl.java:1054)
        at com.mycompay.source.code.IgniteNode.main(IgniteNode.java:12)
Caused by: class org.apache.ignite.IgniteCheckedException: Failed to start
SPI: TcpDiscoverySpi [addrRslvr=null, sockTimeout=5000, ackTimeout=5000,
marsh=JdkMarshaller [], reconCnt=10, maxAckTimeout=600000,
forceSrvMode=false, clientReconnectDisabled=false]
        at
org.apache.ignite.internal.managers.GridManagerAdapter.startSpi(GridManagerAdapter.java:300)
        at
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager.start(GridDiscoveryManager.java:882)
        at
org.apache.ignite.internal.IgniteKernal.startManager(IgniteKernal.java:1852)
        ... 37 more
Caused by: class org.apache.ignite.spi.IgniteSpiException: Thread has been
interrupted.
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl.joinTopology(ServerImpl.java:908)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl.spiStart(ServerImpl.java:360)
        at
org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi.spiStart(TcpDiscoverySpi.java:1846)
        at
org.apache.ignite.internal.managers.GridManagerAdapter.startSpi(GridManagerAdapter.java:297)
        ... 39 more

[16:38:48] (wrn) Ignoring stopping Ignite instance that was already stopped
or never started: null
[16:38:48] Ignite node stopped OK [uptime=00:00:03.462]
log4j:WARN No appenders could be found for logger
(org.apache.kafka.clients.producer.ProducerConfig).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for
more info.
Exception in thread "main" com.google.inject.ProvisionException: Unable to
provision, see the following errors:

1) Error in custom provider, class org.apache.ignite.IgniteException: Failed
to start manager: GridManagerAdapter [enabled=true,
name=org.apache.ignite.internal.managers.discovery.GridDiscoveryManager]
  at
com.mycompay.source.code.IgniteNodeModule.provideIgniteCluster(IgniteNodeModule.java:24)
  at
com.mycompay.source.code.IgniteNodeModule.provideIgniteCluster(IgniteNodeModule.java:24)
  while locating org.apache.ignite.Ignite
    for the 1st parameter of
com.mycompay.source.code.IgniteNodeModule.provideIgniteEvents(IgniteNodeModule.java:35)
  at
com.mycompay.source.code.IgniteNodeModule.provideIgniteEvents(IgniteNodeModule.java:35)
  while locating org.apache.ignite.IgniteEvents
    for the 1st parameter of
com.mycompay.source.code.KafkaMessageSenderService.<init>(KafkaMessageSenderService.java:45)
  while locating com.mycompay.source.code.KafkaMessageSenderService

1 error
        at
com.google.inject.internal.InjectorImpl$2.get(InjectorImpl.java:1028)
        at
com.google.inject.internal.InjectorImpl.getInstance(InjectorImpl.java:1054)
        at com.mycompay.source.code.IgniteNode.main(IgniteNode.java:12)
Caused by: class org.apache.ignite.IgniteException: Failed to start manager:
GridManagerAdapter [enabled=true,
name=org.apache.ignite.internal.managers.discovery.GridDiscoveryManager]
        at
org.apache.ignite.internal.util.IgniteUtils.convertException(IgniteUtils.java:966)
        at org.apache.ignite.Ignition.start(Ignition.java:375)
        at
com.mycompay.source.code.IgniteNodeModule.provideIgniteCluster(IgniteNodeModule.java:24)
        at
com.mycompay.source.code.IgniteNodeModule$$FastClassByGuice$$cc13dccd.invoke(<generated>)
        at
com.google.inject.internal.ProviderMethod$FastClassProviderMethod.doProvision(ProviderMethod.java:264)
        at
com.google.inject.internal.ProviderMethod$Factory.provision(ProviderMethod.java:401)
        at
com.google.inject.internal.ProviderMethod$Factory.get(ProviderMethod.java:376)
        at
com.google.inject.internal.ProviderToInternalFactoryAdapter$1.call(ProviderToInternalFactoryAdapter.java:46)
        at
com.google.inject.internal.InjectorImpl.callInContext(InjectorImpl.java:1092)
        at
com.google.inject.internal.ProviderToInternalFactoryAdapter.get(ProviderToInternalFactoryAdapter.java:40)
        at
com.google.inject.internal.SingletonScope$1.get(SingletonScope.java:194)
        at
com.google.inject.internal.InternalFactoryToProviderAdapter.get(InternalFactoryToProviderAdapter.java:41)
        at
com.google.inject.internal.SingleParameterInjector.inject(SingleParameterInjector.java:38)
        at
com.google.inject.internal.SingleParameterInjector.getAll(SingleParameterInjector.java:62)
        at
com.google.inject.internal.ProviderMethod$Factory.provision(ProviderMethod.java:402)
        at
com.google.inject.internal.ProviderMethod$Factory.get(ProviderMethod.java:376)
        at
com.google.inject.internal.ProviderToInternalFactoryAdapter$1.call(ProviderToInternalFactoryAdapter.java:46)
        at
com.google.inject.internal.InjectorImpl.callInContext(InjectorImpl.java:1092)
        at
com.google.inject.internal.ProviderToInternalFactoryAdapter.get(ProviderToInternalFactoryAdapter.java:40)
        at
com.google.inject.internal.SingletonScope$1.get(SingletonScope.java:194)
        at
com.google.inject.internal.InternalFactoryToProviderAdapter.get(InternalFactoryToProviderAdapter.java:41)
        at
com.google.inject.internal.SingleParameterInjector.inject(SingleParameterInjector.java:38)
        at
com.google.inject.internal.SingleParameterInjector.getAll(SingleParameterInjector.java:62)
        at
com.google.inject.internal.ConstructorInjector.provision(ConstructorInjector.java:110)
        at
com.google.inject.internal.ConstructorInjector.construct(ConstructorInjector.java:90)
        at
com.google.inject.internal.ConstructorBindingImpl$Factory.get(ConstructorBindingImpl.java:268)
        at
com.google.inject.internal.InjectorImpl$2$1.call(InjectorImpl.java:1019)
        at
com.google.inject.internal.InjectorImpl.callInContext(InjectorImpl.java:1085)
        at
com.google.inject.internal.InjectorImpl$2.get(InjectorImpl.java:1015)
        ... 2 more
Caused by: class org.apache.ignite.IgniteCheckedException: Failed to start
manager: GridManagerAdapter [enabled=true,
name=org.apache.ignite.internal.managers.discovery.GridDiscoveryManager]
        at
org.apache.ignite.internal.IgniteKernal.startManager(IgniteKernal.java:1857)
        at
org.apache.ignite.internal.IgniteKernal.start(IgniteKernal.java:1002)
        at
org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.start0(IgnitionEx.java:1909)
        at
org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.start(IgnitionEx.java:1652)
        at
org.apache.ignite.internal.IgnitionEx.start0(IgnitionEx.java:1080)
        at
org.apache.ignite.internal.IgnitionEx.startConfigurations(IgnitionEx.java:998)
        at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.java:884)
        at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.java:803)
        at org.apache.ignite.Ignition.start(Ignition.java:372)
        ... 29 more
Caused by: class org.apache.ignite.IgniteCheckedException: Failed to start
SPI: TcpDiscoverySpi [addrRslvr=null, sockTimeout=5000, ackTimeout=5000,
marsh=JdkMarshaller [], reconCnt=10, maxAckTimeout=600000,
forceSrvMode=false, clientReconnectDisabled=false]
        at
org.apache.ignite.internal.managers.GridManagerAdapter.startSpi(GridManagerAdapter.java:300)
        at
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager.start(GridDiscoveryManager.java:882)
        at
org.apache.ignite.internal.IgniteKernal.startManager(IgniteKernal.java:1852)
        ... 37 more
Caused by: class org.apache.ignite.spi.IgniteSpiException: Thread has been
interrupted.
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl.joinTopology(ServerImpl.java:908)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl.spiStart(ServerImpl.java:360)
        at
org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi.spiStart(TcpDiscoverySpi.java:1846)
        at
org.apache.ignite.internal.managers.GridManagerAdapter.startSpi(GridManagerAdapter.java:297)




--
Sent from: http://apache-ignite-users.70518.x6.nabble.com/

Re: [Ignite 2.0.0] Stopping the node in order to prevent cluster wide instability.

Posted by Denis Magda <dm...@apache.org>.
Looking at the source code of the master I couldn’t get how this NPE can happen. Please upgrade to 2.3.0 and let us know if you still observe the bug.

—
Denis

> On Nov 12, 2017, at 4:27 AM, dark <ek...@gmail.com> wrote:
> 
> Another Ignite node logs here. 
> 
> Nodes are currently under GC for less than a second.
> 
> [19:23:31,416][ERROR][tcp-disco-msg-worker-#2%null%][TcpDiscoverySpi]
> TcpDiscoverSpi's message worker thread failed abnormally. Stopping the node
> in order to prevent cluster wide instability.
> java.lang.NullPointerException
>        at
> org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtCacheAdapter.sizeLong(GridDhtCacheAdapter.java:580)
>        at
> org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtCacheAdapter.size(GridDhtCacheAdapter.java:573)
>        at
> org.apache.ignite.internal.processors.cache.CacheMetricsImpl.getSize(CacheMetricsImpl.java:226)
>        at
> org.apache.ignite.internal.processors.cache.CacheMetricsSnapshot.<init>(CacheMetricsSnapshot.java:250)
>        at
> org.apache.ignite.internal.processors.cache.GridCacheAdapter.localMetrics(GridCacheAdapter.java:3211)
>        at
> org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$7.cacheMetrics(GridDiscoveryManager.java:975)
>        at
> org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.processMetricsUpdateMessage(ServerImpl.java:4947)
>        at
> org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.processMessage(ServerImpl.java:2562)
>        at
> org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.processMessage(ServerImpl.java:2364)
>        at
> org.apache.ignite.spi.discovery.tcp.ServerImpl$MessageWorkerAdapter.body(ServerImpl.java:6460)
>        at
> org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.body(ServerImpl.java:2450)
>        at
> org.apache.ignite.spi.IgniteSpiThread.run(IgniteSpiThread.java:62)
> [19:23:31,444][ERROR][tcp-disco-msg-worker-#2%null%][TcpDiscoverySpi]
> Runtime error caught during grid runnable execution: IgniteSpiThread
> [name=tcp-disco-msg-worker-#2%null%]
> java.lang.NullPointerException
>        at
> org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtCacheAdapter.sizeLong(GridDhtCacheAdapter.java:580)
>        at
> org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtCacheAdapter.size(GridDhtCacheAdapter.java:573)
>        at
> org.apache.ignite.internal.processors.cache.CacheMetricsImpl.getSize(CacheMetricsImpl.java:226)
>        at
> org.apache.ignite.internal.processors.cache.CacheMetricsSnapshot.<init>(CacheMetricsSnapshot.java:250)
>        at
> org.apache.ignite.internal.processors.cache.GridCacheAdapter.localMetrics(GridCacheAdapter.java:3211)
>        at
> org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$7.cacheMetrics(GridDiscoveryManager.java:975)
>        at
> org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.processMetricsUpdateMessage(ServerImpl.java:4947)
>        at
> org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.processMessage(ServerImpl.java:2562)
>        at
> org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.processMessage(ServerImpl.java:2364)
>        at
> org.apache.ignite.spi.discovery.tcp.ServerImpl$MessageWorkerAdapter.body(ServerImpl.java:6460)
>        at
> org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.body(ServerImpl.java:2450)
>        at
> org.apache.ignite.spi.IgniteSpiThread.run(IgniteSpiThread.java:62)
> [19:23:31,470][INFO ][node-stop-thread][GridTcpRestProtocol] Command
> protocol successfully stopped: TCP binary
> 
> 
> 
> --
> Sent from: http://apache-ignite-users.70518.x6.nabble.com/


Re: [Ignite 2.0.0] Stopping the node in order to prevent cluster wide instability.

Posted by dark <ek...@gmail.com>.
Another Ignite node logs here. 

Nodes are currently under GC for less than a second.

[19:23:31,416][ERROR][tcp-disco-msg-worker-#2%null%][TcpDiscoverySpi]
TcpDiscoverSpi's message worker thread failed abnormally. Stopping the node
in order to prevent cluster wide instability.
java.lang.NullPointerException
        at
org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtCacheAdapter.sizeLong(GridDhtCacheAdapter.java:580)
        at
org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtCacheAdapter.size(GridDhtCacheAdapter.java:573)
        at
org.apache.ignite.internal.processors.cache.CacheMetricsImpl.getSize(CacheMetricsImpl.java:226)
        at
org.apache.ignite.internal.processors.cache.CacheMetricsSnapshot.<init>(CacheMetricsSnapshot.java:250)
        at
org.apache.ignite.internal.processors.cache.GridCacheAdapter.localMetrics(GridCacheAdapter.java:3211)
        at
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$7.cacheMetrics(GridDiscoveryManager.java:975)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.processMetricsUpdateMessage(ServerImpl.java:4947)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.processMessage(ServerImpl.java:2562)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.processMessage(ServerImpl.java:2364)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl$MessageWorkerAdapter.body(ServerImpl.java:6460)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.body(ServerImpl.java:2450)
        at
org.apache.ignite.spi.IgniteSpiThread.run(IgniteSpiThread.java:62)
[19:23:31,444][ERROR][tcp-disco-msg-worker-#2%null%][TcpDiscoverySpi]
Runtime error caught during grid runnable execution: IgniteSpiThread
[name=tcp-disco-msg-worker-#2%null%]
java.lang.NullPointerException
        at
org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtCacheAdapter.sizeLong(GridDhtCacheAdapter.java:580)
        at
org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtCacheAdapter.size(GridDhtCacheAdapter.java:573)
        at
org.apache.ignite.internal.processors.cache.CacheMetricsImpl.getSize(CacheMetricsImpl.java:226)
        at
org.apache.ignite.internal.processors.cache.CacheMetricsSnapshot.<init>(CacheMetricsSnapshot.java:250)
        at
org.apache.ignite.internal.processors.cache.GridCacheAdapter.localMetrics(GridCacheAdapter.java:3211)
        at
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$7.cacheMetrics(GridDiscoveryManager.java:975)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.processMetricsUpdateMessage(ServerImpl.java:4947)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.processMessage(ServerImpl.java:2562)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.processMessage(ServerImpl.java:2364)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl$MessageWorkerAdapter.body(ServerImpl.java:6460)
        at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.body(ServerImpl.java:2450)
        at
org.apache.ignite.spi.IgniteSpiThread.run(IgniteSpiThread.java:62)
[19:23:31,470][INFO ][node-stop-thread][GridTcpRestProtocol] Command
protocol successfully stopped: TCP binary



--
Sent from: http://apache-ignite-users.70518.x6.nabble.com/