You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@ignite.apache.org by "yonggu.lee" <yo...@navercorp.com> on 2018/04/30 09:01:37 UTC

ignite cluster cannot be activated - Failed to restore from a checkpoint

Our ignite cluster stuck in an inactive state, cannot be restored from a
checkpoint.

When cluster is activated, the following exception occurs,

[17:40:54,750][INFO][exchange-worker-#122][GridCacheDatabaseSharedManager]
Read checkpoint status
[startMarker=/naver/ignite_storage/20180330/storage/node00-698bff11-10c4-4fa9-87bf-07f22714951e/cp/1525070153790-cd46119a-51cd-49af-9ffa-0dccca84fb20-START.bin,
endMarker=/naver/ignite_storage/20180330/storage/node00-698bff11-10c4-4fa9-87bf-07f22714951e/cp/1525070153790-cd46119a-51cd-49af-9ffa-0dccca84fb20-END.bin]
[17:40:54,750][INFO][exchange-worker-#122][GridCacheDatabaseSharedManager]
Applying lost cache updates since last checkpoint record
[lastMarked=FileWALPointer [idx=106922, fileOffset=3457606, len=299101,
forceFlush=false], lastCheckpointId=cd46119a-51cd-49af-9ffa-0dccca84fb20]
[17:40:54,818][SEVERE][exchange-worker-#122][GridDhtPartitionsExchangeFuture]
Failed to reinitialize local partitions (preloading will be stopped):
GridDhtPartitionExchangeId [topVer=AffinityTopologyVersion [topVer=12,
minorTopVer=1], discoEvt=DiscoveryCustomEvent
[customMsg=ChangeGlobalStateMessage
[id=9a375b51361-acca12ae-d9fb-4e21-a282-3bc7af575257,
reqId=b3985722-b063-4e5a-831e-9f84d656df96,
initiatingNodeId=c6e1394e-bf7a-4fe4-a1bf-f64193bd44f4, activate=true],
affTopVer=AffinityTopologyVersion [topVer=12, minorTopVer=1],
super=DiscoveryEvent [evtNode=TcpDiscoveryNode
[id=c6e1394e-bf7a-4fe4-a1bf-f64193bd44f4, addrs=[10.116.24.222, 10.244.5.0,
127.0.0.1, 172.17.0.1, 192.168.193.192], sockAddrs=[/10.244.5.0:47500,
/172.17.0.1:47500, /192.168.193.192:47500, /127.0.0.1:47500,
/10.116.24.222:47500], discPort=47500, order=3, intOrder=3,
lastExchangeTime=1525077608394, loc=false, ver=2.3.0#20171220-sha1:8431829c,
isClient=false], topVer=12, nodeId8=e8f4c909, msg=null,
type=DISCOVERY_CUSTOM_EVT, tstamp=1525077647980]], nodeId=c6e1394e,
evt=DISCOVERY_CUSTOM_EVT]
java.lang.IndexOutOfBoundsException: index 890
        at
java.util.concurrent.atomic.AtomicReferenceArray.checkedByteOffset(AtomicReferenceArray.java:78)
        at
java.util.concurrent.atomic.AtomicReferenceArray.get(AtomicReferenceArray.java:125)
        at
org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtPartitionTopologyImpl.forceCreatePartition(GridDhtPartitionTopologyImpl.java:767)
        at
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager.applyUpdate(GridCacheDatabaseSharedManager.java:1777)
        at
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager.applyLastUpdates(GridCacheDatabaseSharedManager.java:1637)
        at
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager.restoreState(GridCacheDatabaseSharedManager.java:1072)
        at
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager.beforeExchange(GridCacheDatabaseSharedManager.java:863)
        at
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.distributedExchange(GridDhtPartitionsExchangeFuture.java:1019)
        at
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.init(GridDhtPartitionsExchangeFuture.java:651)
        at
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ExchangeWorker.body(GridCachePartitionExchangeManager.java:2279)
        at
org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:110)
        at java.lang.Thread.run(Thread.java:745)
[17:40:54,818][INFO][exchange-worker-#122][GridDhtPartitionsExchangeFuture]
Finish exchange future [startVer=AffinityTopologyVersion [topVer=12,
minorTopVer=1], resVer=null, err=java.lang.IndexOutOfBoundsException: index
890]
[17:40:54,830][SEVERE][exchange-worker-#122][GridCachePartitionExchangeManager]
Failed to wait for completion of partition map exchange (preloading will not
start): GridDhtPartitionsExchangeFuture [firstDiscoEvt=DiscoveryCustomEvent
[customMsg=null, affTopVer=AffinityTopologyVersion [topVer=12,
minorTopVer=1], super=DiscoveryEvent [evtNode=TcpDiscoveryNode
[id=c6e1394e-bf7a-4fe4-a1bf-f64193bd44f4, addrs=[10.116.24.222, 10.244.5.0,
127.0.0.1, 172.17.0.1, 192.168.193.192], sockAddrs=[/10.244.5.0:47500,
/172.17.0.1:47500, /192.168.193.192:47500, /127.0.0.1:47500,
/10.116.24.222:47500], discPort=47500, order=3, intOrder=3,
lastExchangeTime=1525077608394, loc=false, ver=2.3.0#20171220-sha1:8431829c,
isClient=false], topVer=12, nodeId8=e8f4c909, msg=null,
type=DISCOVERY_CUSTOM_EVT, tstamp=1525077647980]], crd=TcpDiscoveryNode
[id=8e65440a-df65-4770-9a7b-26672bd574a3, addrs=[10.116.25.32, 10.244.6.0,
127.0.0.1, 172.17.0.1, 192.168.82.128], sockAddrs=[/10.244.6.0:47500,
/10.116.25.32:47500, /172.17.0.1:47500, /192.168.82.128:47500,
/127.0.0.1:47500], discPort=47500, order=1, intOrder=1,
lastExchangeTime=1525077608394, loc=false, ver=2.3.0#20171220-sha1:8431829c,
isClient=false], exchId=GridDhtPartitionExchangeId
[topVer=AffinityTopologyVersion [topVer=12, minorTopVer=1],
discoEvt=DiscoveryCustomEvent [customMsg=null,
affTopVer=AffinityTopologyVersion [topVer=12, minorTopVer=1],
super=DiscoveryEvent [evtNode=TcpDiscoveryNode
[id=c6e1394e-bf7a-4fe4-a1bf-f64193bd44f4, addrs=[10.116.24.222, 10.244.5.0,
127.0.0.1, 172.17.0.1, 192.168.193.192], sockAddrs=[/10.244.5.0:47500,
/172.17.0.1:47500, /192.168.193.192:47500, /127.0.0.1:47500,
/10.116.24.222:47500], discPort=47500, order=3, intOrder=3,
lastExchangeTime=1525077608394, loc=false, ver=2.3.0#20171220-sha1:8431829c,
isClient=false], topVer=12, nodeId8=e8f4c909, msg=null,
type=DISCOVERY_CUSTOM_EVT, tstamp=1525077647980]], nodeId=c6e1394e,
evt=DISCOVERY_CUSTOM_EVT], added=true, initFut=GridFutureAdapter
[ignoreInterrupts=false, state=DONE, res=false, hash=989374705], init=false,
lastVer=null, partReleaseFut=PartitionReleaseFuture
[topVer=AffinityTopologyVersion [topVer=12, minorTopVer=1],
futures=[ExplicitLockReleaseFuture [topVer=AffinityTopologyVersion
[topVer=12, minorTopVer=1], futures=[]], TxReleaseFuture
[topVer=AffinityTopologyVersion [topVer=12, minorTopVer=1], futures=[]],
AtomicUpdateReleaseFuture [topVer=AffinityTopologyVersion [topVer=12,
minorTopVer=1], futures=[]], DataStreamerReleaseFuture
[topVer=AffinityTopologyVersion [topVer=12, minorTopVer=1], futures=[]]]],
exchActions=null, affChangeMsg=null, initTs=1525077647990,
centralizedAff=false, changeGlobalStateE=null, done=true, state=SRV,
evtLatch=0, remaining=[8e65440a-df65-4770-9a7b-26672bd574a3,
18184b4a-0fe0-4fff-a917-a3b03f16a509, c6e1394e-bf7a-4fe4-a1bf-f64193bd44f4,
491c9af5-e855-42d8-b617-e72bf3099a46, 4aae4b1e-6ef4-43ac-b156-f5445adb40c6,
91d8036d-c74f-48d7-b389-82ebba96adf2, 95e77e2f-ba25-4c12-b9b0-d1b21386eb36,
c12d30c3-bf9e-4c58-9468-8ef878ec2679, 1edbfd89-a03f-4fea-93b9-d058eb93f66b],
super=GridFutureAdapter [ignoreInterrupts=false, state=DONE,
res=java.lang.IndexOutOfBoundsException: index 890, hash=328088520]]
class org.apache.ignite.IgniteCheckedException: index 890
        at
org.apache.ignite.internal.util.IgniteUtils.cast(IgniteUtils.java:7252)
        at
org.apache.ignite.internal.util.future.GridFutureAdapter.resolve(GridFutureAdapter.java:259)
        at
org.apache.ignite.internal.util.future.GridFutureAdapter.get0(GridFutureAdapter.java:207)
        at
org.apache.ignite.internal.util.future.GridFutureAdapter.get(GridFutureAdapter.java:159)
        at
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ExchangeWorker.body(GridCachePartitionExchangeManager.java:2289)
        at
org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:110)
        at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.IndexOutOfBoundsException: index 890
        at
java.util.concurrent.atomic.AtomicReferenceArray.checkedByteOffset(AtomicReferenceArray.java:78)
        at
java.util.concurrent.atomic.AtomicReferenceArray.get(AtomicReferenceArray.java:125)
        at
org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtPartitionTopologyImpl.forceCreatePartition(GridDhtPartitionTopologyImpl.java:767)
        at
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager.applyUpdate(GridCacheDatabaseSharedManager.java:1777)
        at
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager.applyLastUpdates(GridCacheDatabaseSharedManager.java:1637)
        at
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager.restoreState(GridCacheDatabaseSharedManager.java:1072)
        at
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager.beforeExchange(GridCacheDatabaseSharedManager.java:863)
        at
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.distributedExchange(GridDhtPartitionsExchangeFuture.java:1019)
        at
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture.init(GridDhtPartitionsExchangeFuture.java:651)
        at
org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ExchangeWorker.body(GridCachePartitionExchangeManager.java:2279)
        ... 2 more

And, I cannot do any operations.

This symptom started to show when I cancelled (Ctrl+C) a service deployment.
At that time, other job was writing to a cache. I just changed the sticky
parameter of a service deployment (from false to true), and the deployment
was too slow, so I cancelled it. And then I restarted the cluster, and the
problem began.

Is there any solution or workaround for this error like skipping the
checkpoint restoring process, because it's ok for me to lose some recent
cache updates.

Ignite version is 2.3.0 and config is as follows.

<?xml version="1.0" encoding="UTF-8"?>



<beans xmlns="http://www.springframework.org/schema/beans"
       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
       xsi:schemaLocation="
       http://www.springframework.org/schema/beans
       http://www.springframework.org/schema/beans/spring-beans.xsd">
    <bean id="grid.cfg"
class="org.apache.ignite.configuration.IgniteConfiguration">

        
        <property name="serviceThreadPoolSize" value="80"/>

        <property name="failureDetectionTimeout" value="3600000"/>

        
        <property name="cacheConfiguration">
            <list>
                
                <bean
class="org.apache.ignite.configuration.CacheConfiguration">
                    <property name="name"
value="valid_dup_ratio_cache_name"/> 
                    <property name="atomicityMode" value="ATOMIC"/>
                    <property name="cacheMode" value="REPLICATED"/>
                    <property name="indexedTypes">
                        <list>
                            <value>java.lang.String</value>
                            <value>java.util.LinkedList</value>
                        </list>
                    </property>
                </bean>

                
                <bean
class="org.apache.ignite.configuration.CacheConfiguration">
                    <property name="name"
value="dup_ratio_hbase_read_through"/>
                    <property name="atomicityMode" value="ATOMIC"/>
                    <property name="cacheMode" value="PARTITIONED"/>
                    <property name="onheapCacheEnabled" value="true"/>
                    <property name="evictionPolicy">
                        <bean
class="org.apache.ignite.cache.eviction.lru.LruEvictionPolicy">
                            <property name="batchSize" value="5"/>
                            
                        </bean>
                    </property>

                    <property name="expiryPolicyFactory">
                        <bean id="expiryPolicy"
class="javax.cache.expiry.CreatedExpiryPolicy" factory-method="factoryOf">
                            <constructor-arg>
                                <bean class="javax.cache.expiry.Duration">
                                    <constructor-arg value="HOURS"/>
                                    <constructor-arg value="24"/>
                                </bean>
                            </constructor-arg>
                        </bean>
                    </property>

                    <property name="cacheStoreFactory">
                        <bean
class="javax.cache.configuration.FactoryBuilder" factory-method="factoryOf">
                            <constructor-arg
value="com.naver.kweb.serp.title.ignite.read_through.HBaseDupRatioAdapter"/>
                        </bean>
                    </property>
                    <property name="readThrough" value="true"/>
                    <property name="writeThrough" value="false"/>
                </bean>
            </list>
        </property>

        
        <property name="discoverySpi">
            <bean
class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
                <property name="clientReconnectDisabled" value="false"/>
                <property name="networkTimeout" value="120000"/>
                <property name="ipFinder">
                    
                    <bean
class="org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder">
                        <property name="addresses">
                            <list>
                                
                               
<value>csb7x0876.nfra.io:47500..47509</value>
                               
<value>csb7x0877.nfra.io:47500..47509</value>
                               
<value>csb7x0878.nfra.io:47500..47509</value>
                               
<value>csb7x0879.nfra.io:47500..47509</value>
                               
<value>csb7x0880.nfra.io:47500..47509</value>
                               
<value>csb7x0881.nfra.io:47500..47509</value>
                               
<value>csb7x0882.nfra.io:47500..47509</value>
                               
<value>csb7x0883.nfra.io:47500..47509</value>
                               
<value>csb7x0884.nfra.io:47500..47509</value>
                               
<value>csb7x0885.nfra.io:47500..47509</value>
                            </list>
                        </property>
                    </bean>
                </property>
            </bean>
        </property>

        
        <property name="dataStorageConfiguration">
            <bean
class="org.apache.ignite.configuration.DataStorageConfiguration">
                 

                <property name="writeThrottlingEnabled" value="true"/> 

                <property name="defaultDataRegionConfiguration">
                    <bean
class="org.apache.ignite.configuration.DataRegionConfiguration">
                        <property name="persistenceEnabled" value="true"/> 
                        <property name="name" value="Default_Region"/>
                        <property name="maxSize" value="#{75L * 1024 * 1024
* 1024}"/> 
                        <property name="checkpointPageBufferSize"
value="#{1024L * 1024 * 1024}"/> 
                        <property name="metricsEnabled" value="true"/>
                    </bean>
                </property>
                <property name="storagePath"
value="/naver/ignite_storage/20180330/storage"/>
                <property name="walPath"
value="/naver/ignite_storage/20180330/wal"/>
                <property name="walArchivePath"
value="/naver/ignite_storage/20180330/walArchive"/>
                <property name="metricsEnabled" value="true"/>
            </bean>
        </property>

        
        <property name="binaryConfiguration">
            <bean
class="org.apache.ignite.configuration.BinaryConfiguration">
                <property name="typeConfigurations">
                    <list>
                        <bean
class="org.apache.ignite.binary.BinaryTypeConfiguration">
                            <property name="typeName"
value="com.naver.kweb.serp.title.ignite.service.TitleMakerServiceImpl"/>
                        </bean>
                    </list>
                </property>
            </bean>
        </property>
    </bean>
</beans>

Thanks.



--
Sent from: http://apache-ignite-users.70518.x6.nabble.com/

Re: ignite cluster cannot be activated - Failed to restore from a checkpoint

Posted by Pavel Vinokurov <vi...@gmail.com>.
I mean you could try clean up walPath and walArchivePath to restore the
last checkporint without applying the WAL.

2018-04-30 12:50 GMT+03:00 yonggu.lee <yo...@navercorp.com>:

> In our configuration, *work/* directory is always deleted when the cluster
> restarts because we are using docker & kubernetes for cluster managing and
> not set "workDirectory" property to a persistent path. So there is no
> *work/db/wal* also, but the error occurs.
>
> And, as a other topic, the *work/* directory should be located in a
> persistent path? Our current config, not storing work directory, is wrong?
> In other words, the "workDirectory" config property should be set to a
> persistent one like,
>
> <property name="workDirectory" value="/naver/ignite_storage/
> 20180330/work"/>
>
> as well as storagePath, walPath, walArchivePath?
>
> Thanks in advance.
>
>
>
> --
> Sent from: http://apache-ignite-users.70518.x6.nabble.com/
>



-- 

Regards

Pavel Vinokurov

Re: ignite cluster cannot be activated - Failed to restore from a checkpoint

Posted by "yonggu.lee" <yo...@navercorp.com>.
In our configuration, *work/* directory is always deleted when the cluster
restarts because we are using docker & kubernetes for cluster managing and
not set "workDirectory" property to a persistent path. So there is no
*work/db/wal* also, but the error occurs.

And, as a other topic, the *work/* directory should be located in a
persistent path? Our current config, not storing work directory, is wrong?
In other words, the "workDirectory" config property should be set to a
persistent one like,

<property name="workDirectory" value="/naver/ignite_storage/20180330/work"/>

as well as storagePath, walPath, walArchivePath?

Thanks in advance.



--
Sent from: http://apache-ignite-users.70518.x6.nabble.com/

Re: ignite cluster cannot be activated - Failed to restore from a checkpoint

Posted by Pavel Vinokurov <vi...@gmail.com>.
Hi,

You could remove the folder *work/db/wal* and restart the cluster, but
before backup *work/* directory fully.
The workaround is to not apply last changes from WAL, just load the last
savepoint.

2018-04-30 12:01 GMT+03:00 yonggu.lee <yo...@navercorp.com>:

> Our ignite cluster stuck in an inactive state, cannot be restored from a
> checkpoint.
>
> When cluster is activated, the following exception occurs,
>
> [17:40:54,750][INFO][exchange-worker-#122][GridCacheDatabaseSharedManager]
> Read checkpoint status
> [startMarker=/naver/ignite_storage/20180330/storage/
> node00-698bff11-10c4-4fa9-87bf-07f22714951e/cp/
> 1525070153790-cd46119a-51cd-49af-9ffa-0dccca84fb20-START.bin,
> endMarker=/naver/ignite_storage/20180330/storage/
> node00-698bff11-10c4-4fa9-87bf-07f22714951e/cp/
> 1525070153790-cd46119a-51cd-49af-9ffa-0dccca84fb20-END.bin]
> [17:40:54,750][INFO][exchange-worker-#122][GridCacheDatabaseSharedManager]
> Applying lost cache updates since last checkpoint record
> [lastMarked=FileWALPointer [idx=106922, fileOffset=3457606, len=299101,
> forceFlush=false], lastCheckpointId=cd46119a-51cd-49af-9ffa-0dccca84fb20]
> [17:40:54,818][SEVERE][exchange-worker-#122][
> GridDhtPartitionsExchangeFuture]
> Failed to reinitialize local partitions (preloading will be stopped):
> GridDhtPartitionExchangeId [topVer=AffinityTopologyVersion [topVer=12,
> minorTopVer=1], discoEvt=DiscoveryCustomEvent
> [customMsg=ChangeGlobalStateMessage
> [id=9a375b51361-acca12ae-d9fb-4e21-a282-3bc7af575257,
> reqId=b3985722-b063-4e5a-831e-9f84d656df96,
> initiatingNodeId=c6e1394e-bf7a-4fe4-a1bf-f64193bd44f4, activate=true],
> affTopVer=AffinityTopologyVersion [topVer=12, minorTopVer=1],
> super=DiscoveryEvent [evtNode=TcpDiscoveryNode
> [id=c6e1394e-bf7a-4fe4-a1bf-f64193bd44f4, addrs=[10.116.24.222,
> 10.244.5.0,
> 127.0.0.1, 172.17.0.1, 192.168.193.192], sockAddrs=[/10.244.5.0:47500,
> /172.17.0.1:47500, /192.168.193.192:47500, /127.0.0.1:47500,
> /10.116.24.222:47500], discPort=47500, order=3, intOrder=3,
> lastExchangeTime=1525077608394, loc=false, ver=2.3.0#20171220-sha1:
> 8431829c,
> isClient=false], topVer=12, nodeId8=e8f4c909, msg=null,
> type=DISCOVERY_CUSTOM_EVT, tstamp=1525077647980]], nodeId=c6e1394e,
> evt=DISCOVERY_CUSTOM_EVT]
> java.lang.IndexOutOfBoundsException: index 890
>         at
> java.util.concurrent.atomic.AtomicReferenceArray.checkedByteOffset(
> AtomicReferenceArray.java:78)
>         at
> java.util.concurrent.atomic.AtomicReferenceArray.get(
> AtomicReferenceArray.java:125)
>         at
> org.apache.ignite.internal.processors.cache.distributed.dht.
> GridDhtPartitionTopologyImpl.forceCreatePartition(
> GridDhtPartitionTopologyImpl.java:767)
>         at
> org.apache.ignite.internal.processors.cache.persistence.
> GridCacheDatabaseSharedManager.applyUpdate(GridCacheDatabaseSharedManager
> .java:1777)
>         at
> org.apache.ignite.internal.processors.cache.persistence.
> GridCacheDatabaseSharedManager.applyLastUpdates(
> GridCacheDatabaseSharedManager.java:1637)
>         at
> org.apache.ignite.internal.processors.cache.persistence.
> GridCacheDatabaseSharedManager.restoreState(GridCacheDatabaseSharedManager
> .java:1072)
>         at
> org.apache.ignite.internal.processors.cache.persistence.
> GridCacheDatabaseSharedManager.beforeExchange(
> GridCacheDatabaseSharedManager.java:863)
>         at
> org.apache.ignite.internal.processors.cache.distributed.dht.preloader.
> GridDhtPartitionsExchangeFuture.distributedExchange(
> GridDhtPartitionsExchangeFuture.java:1019)
>         at
> org.apache.ignite.internal.processors.cache.distributed.dht.preloader.
> GridDhtPartitionsExchangeFuture.init(GridDhtPartitionsExchangeFutur
> e.java:651)
>         at
> org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeMana
> ger$ExchangeWorker.body(GridCachePartitionExchangeManager.java:2279)
>         at
> org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:110)
>         at java.lang.Thread.run(Thread.java:745)
> [17:40:54,818][INFO][exchange-worker-#122][GridDhtPartitionsExchangeFutur
> e]
> Finish exchange future [startVer=AffinityTopologyVersion [topVer=12,
> minorTopVer=1], resVer=null, err=java.lang.IndexOutOfBoundsException:
> index
> 890]
> [17:40:54,830][SEVERE][exchange-worker-#122][
> GridCachePartitionExchangeManager]
> Failed to wait for completion of partition map exchange (preloading will
> not
> start): GridDhtPartitionsExchangeFuture [firstDiscoEvt=
> DiscoveryCustomEvent
> [customMsg=null, affTopVer=AffinityTopologyVersion [topVer=12,
> minorTopVer=1], super=DiscoveryEvent [evtNode=TcpDiscoveryNode
> [id=c6e1394e-bf7a-4fe4-a1bf-f64193bd44f4, addrs=[10.116.24.222,
> 10.244.5.0,
> 127.0.0.1, 172.17.0.1, 192.168.193.192], sockAddrs=[/10.244.5.0:47500,
> /172.17.0.1:47500, /192.168.193.192:47500, /127.0.0.1:47500,
> /10.116.24.222:47500], discPort=47500, order=3, intOrder=3,
> lastExchangeTime=1525077608394, loc=false, ver=2.3.0#20171220-sha1:
> 8431829c,
> isClient=false], topVer=12, nodeId8=e8f4c909, msg=null,
> type=DISCOVERY_CUSTOM_EVT, tstamp=1525077647980]], crd=TcpDiscoveryNode
> [id=8e65440a-df65-4770-9a7b-26672bd574a3, addrs=[10.116.25.32, 10.244.6.0,
> 127.0.0.1, 172.17.0.1, 192.168.82.128], sockAddrs=[/10.244.6.0:47500,
> /10.116.25.32:47500, /172.17.0.1:47500, /192.168.82.128:47500,
> /127.0.0.1:47500], discPort=47500, order=1, intOrder=1,
> lastExchangeTime=1525077608394, loc=false, ver=2.3.0#20171220-sha1:
> 8431829c,
> isClient=false], exchId=GridDhtPartitionExchangeId
> [topVer=AffinityTopologyVersion [topVer=12, minorTopVer=1],
> discoEvt=DiscoveryCustomEvent [customMsg=null,
> affTopVer=AffinityTopologyVersion [topVer=12, minorTopVer=1],
> super=DiscoveryEvent [evtNode=TcpDiscoveryNode
> [id=c6e1394e-bf7a-4fe4-a1bf-f64193bd44f4, addrs=[10.116.24.222,
> 10.244.5.0,
> 127.0.0.1, 172.17.0.1, 192.168.193.192], sockAddrs=[/10.244.5.0:47500,
> /172.17.0.1:47500, /192.168.193.192:47500, /127.0.0.1:47500,
> /10.116.24.222:47500], discPort=47500, order=3, intOrder=3,
> lastExchangeTime=1525077608394, loc=false, ver=2.3.0#20171220-sha1:
> 8431829c,
> isClient=false], topVer=12, nodeId8=e8f4c909, msg=null,
> type=DISCOVERY_CUSTOM_EVT, tstamp=1525077647980]], nodeId=c6e1394e,
> evt=DISCOVERY_CUSTOM_EVT], added=true, initFut=GridFutureAdapter
> [ignoreInterrupts=false, state=DONE, res=false, hash=989374705],
> init=false,
> lastVer=null, partReleaseFut=PartitionReleaseFuture
> [topVer=AffinityTopologyVersion [topVer=12, minorTopVer=1],
> futures=[ExplicitLockReleaseFuture [topVer=AffinityTopologyVersion
> [topVer=12, minorTopVer=1], futures=[]], TxReleaseFuture
> [topVer=AffinityTopologyVersion [topVer=12, minorTopVer=1], futures=[]],
> AtomicUpdateReleaseFuture [topVer=AffinityTopologyVersion [topVer=12,
> minorTopVer=1], futures=[]], DataStreamerReleaseFuture
> [topVer=AffinityTopologyVersion [topVer=12, minorTopVer=1], futures=[]]]],
> exchActions=null, affChangeMsg=null, initTs=1525077647990,
> centralizedAff=false, changeGlobalStateE=null, done=true, state=SRV,
> evtLatch=0, remaining=[8e65440a-df65-4770-9a7b-26672bd574a3,
> 18184b4a-0fe0-4fff-a917-a3b03f16a509, c6e1394e-bf7a-4fe4-a1bf-
> f64193bd44f4,
> 491c9af5-e855-42d8-b617-e72bf3099a46, 4aae4b1e-6ef4-43ac-b156-
> f5445adb40c6,
> 91d8036d-c74f-48d7-b389-82ebba96adf2, 95e77e2f-ba25-4c12-b9b0-
> d1b21386eb36,
> c12d30c3-bf9e-4c58-9468-8ef878ec2679, 1edbfd89-a03f-4fea-93b9-
> d058eb93f66b],
> super=GridFutureAdapter [ignoreInterrupts=false, state=DONE,
> res=java.lang.IndexOutOfBoundsException: index 890, hash=328088520]]
> class org.apache.ignite.IgniteCheckedException: index 890
>         at
> org.apache.ignite.internal.util.IgniteUtils.cast(IgniteUtils.java:7252)
>         at
> org.apache.ignite.internal.util.future.GridFutureAdapter.
> resolve(GridFutureAdapter.java:259)
>         at
> org.apache.ignite.internal.util.future.GridFutureAdapter.
> get0(GridFutureAdapter.java:207)
>         at
> org.apache.ignite.internal.util.future.GridFutureAdapter.
> get(GridFutureAdapter.java:159)
>         at
> org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeMana
> ger$ExchangeWorker.body(GridCachePartitionExchangeManager.java:2289)
>         at
> org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:110)
>         at java.lang.Thread.run(Thread.java:745)
> Caused by: java.lang.IndexOutOfBoundsException: index 890
>         at
> java.util.concurrent.atomic.AtomicReferenceArray.checkedByteOffset(
> AtomicReferenceArray.java:78)
>         at
> java.util.concurrent.atomic.AtomicReferenceArray.get(
> AtomicReferenceArray.java:125)
>         at
> org.apache.ignite.internal.processors.cache.distributed.dht.
> GridDhtPartitionTopologyImpl.forceCreatePartition(
> GridDhtPartitionTopologyImpl.java:767)
>         at
> org.apache.ignite.internal.processors.cache.persistence.
> GridCacheDatabaseSharedManager.applyUpdate(GridCacheDatabaseSharedManager
> .java:1777)
>         at
> org.apache.ignite.internal.processors.cache.persistence.
> GridCacheDatabaseSharedManager.applyLastUpdates(
> GridCacheDatabaseSharedManager.java:1637)
>         at
> org.apache.ignite.internal.processors.cache.persistence.
> GridCacheDatabaseSharedManager.restoreState(GridCacheDatabaseSharedManager
> .java:1072)
>         at
> org.apache.ignite.internal.processors.cache.persistence.
> GridCacheDatabaseSharedManager.beforeExchange(
> GridCacheDatabaseSharedManager.java:863)
>         at
> org.apache.ignite.internal.processors.cache.distributed.dht.preloader.
> GridDhtPartitionsExchangeFuture.distributedExchange(
> GridDhtPartitionsExchangeFuture.java:1019)
>         at
> org.apache.ignite.internal.processors.cache.distributed.dht.preloader.
> GridDhtPartitionsExchangeFuture.init(GridDhtPartitionsExchangeFutur
> e.java:651)
>         at
> org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeMana
> ger$ExchangeWorker.body(GridCachePartitionExchangeManager.java:2279)
>         ... 2 more
>
> And, I cannot do any operations.
>
> This symptom started to show when I cancelled (Ctrl+C) a service
> deployment.
> At that time, other job was writing to a cache. I just changed the sticky
> parameter of a service deployment (from false to true), and the deployment
> was too slow, so I cancelled it. And then I restarted the cluster, and the
> problem began.
>
> Is there any solution or workaround for this error like skipping the
> checkpoint restoring process, because it's ok for me to lose some recent
> cache updates.
>
> Ignite version is 2.3.0 and config is as follows.
>
> <?xml version="1.0" encoding="UTF-8"?>
>
>
>
> <beans xmlns="http://www.springframework.org/schema/beans"
>        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
>        xsi:schemaLocation="
>        http://www.springframework.org/schema/beans
>        http://www.springframework.org/schema/beans/spring-beans.xsd">
>     <bean id="grid.cfg"
> class="org.apache.ignite.configuration.IgniteConfiguration">
>
>
>         <property name="serviceThreadPoolSize" value="80"/>
>
>         <property name="failureDetectionTimeout" value="3600000"/>
>
>
>         <property name="cacheConfiguration">
>             <list>
>
>                 <bean
> class="org.apache.ignite.configuration.CacheConfiguration">
>                     <property name="name"
> value="valid_dup_ratio_cache_name"/>
>                     <property name="atomicityMode" value="ATOMIC"/>
>                     <property name="cacheMode" value="REPLICATED"/>
>                     <property name="indexedTypes">
>                         <list>
>                             <value>java.lang.String</value>
>                             <value>java.util.LinkedList</value>
>                         </list>
>                     </property>
>                 </bean>
>
>
>                 <bean
> class="org.apache.ignite.configuration.CacheConfiguration">
>                     <property name="name"
> value="dup_ratio_hbase_read_through"/>
>                     <property name="atomicityMode" value="ATOMIC"/>
>                     <property name="cacheMode" value="PARTITIONED"/>
>                     <property name="onheapCacheEnabled" value="true"/>
>                     <property name="evictionPolicy">
>                         <bean
> class="org.apache.ignite.cache.eviction.lru.LruEvictionPolicy">
>                             <property name="batchSize" value="5"/>
>
>                         </bean>
>                     </property>
>
>                     <property name="expiryPolicyFactory">
>                         <bean id="expiryPolicy"
> class="javax.cache.expiry.CreatedExpiryPolicy" factory-method="factoryOf">
>                             <constructor-arg>
>                                 <bean class="javax.cache.expiry.Duration">
>                                     <constructor-arg value="HOURS"/>
>                                     <constructor-arg value="24"/>
>                                 </bean>
>                             </constructor-arg>
>                         </bean>
>                     </property>
>
>                     <property name="cacheStoreFactory">
>                         <bean
> class="javax.cache.configuration.FactoryBuilder"
> factory-method="factoryOf">
>                             <constructor-arg
> value="com.naver.kweb.serp.title.ignite.read_through.
> HBaseDupRatioAdapter"/>
>                         </bean>
>                     </property>
>                     <property name="readThrough" value="true"/>
>                     <property name="writeThrough" value="false"/>
>                 </bean>
>             </list>
>         </property>
>
>
>         <property name="discoverySpi">
>             <bean
> class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
>                 <property name="clientReconnectDisabled" value="false"/>
>                 <property name="networkTimeout" value="120000"/>
>                 <property name="ipFinder">
>
>                     <bean
> class="org.apache.ignite.spi.discovery.tcp.ipfinder.vm.
> TcpDiscoveryVmIpFinder">
>                         <property name="addresses">
>                             <list>
>
>
> <value>csb7x0876.nfra.io:47500..47509</value>
>
> <value>csb7x0877.nfra.io:47500..47509</value>
>
> <value>csb7x0878.nfra.io:47500..47509</value>
>
> <value>csb7x0879.nfra.io:47500..47509</value>
>
> <value>csb7x0880.nfra.io:47500..47509</value>
>
> <value>csb7x0881.nfra.io:47500..47509</value>
>
> <value>csb7x0882.nfra.io:47500..47509</value>
>
> <value>csb7x0883.nfra.io:47500..47509</value>
>
> <value>csb7x0884.nfra.io:47500..47509</value>
>
> <value>csb7x0885.nfra.io:47500..47509</value>
>                             </list>
>                         </property>
>                     </bean>
>                 </property>
>             </bean>
>         </property>
>
>
>         <property name="dataStorageConfiguration">
>             <bean
> class="org.apache.ignite.configuration.DataStorageConfiguration">
>
>
>                 <property name="writeThrottlingEnabled" value="true"/>
>
>                 <property name="defaultDataRegionConfiguration">
>                     <bean
> class="org.apache.ignite.configuration.DataRegionConfiguration">
>                         <property name="persistenceEnabled" value="true"/>
>                         <property name="name" value="Default_Region"/>
>                         <property name="maxSize" value="#{75L * 1024 * 1024
> * 1024}"/>
>                         <property name="checkpointPageBufferSize"
> value="#{1024L * 1024 * 1024}"/>
>                         <property name="metricsEnabled" value="true"/>
>                     </bean>
>                 </property>
>                 <property name="storagePath"
> value="/naver/ignite_storage/20180330/storage"/>
>                 <property name="walPath"
> value="/naver/ignite_storage/20180330/wal"/>
>                 <property name="walArchivePath"
> value="/naver/ignite_storage/20180330/walArchive"/>
>                 <property name="metricsEnabled" value="true"/>
>             </bean>
>         </property>
>
>
>         <property name="binaryConfiguration">
>             <bean
> class="org.apache.ignite.configuration.BinaryConfiguration">
>                 <property name="typeConfigurations">
>                     <list>
>                         <bean
> class="org.apache.ignite.binary.BinaryTypeConfiguration">
>                             <property name="typeName"
> value="com.naver.kweb.serp.title.ignite.service.TitleMakerServiceImpl"/>
>                         </bean>
>                     </list>
>                 </property>
>             </bean>
>         </property>
>     </bean>
> </beans>
>
> Thanks.
>
>
>
> --
> Sent from: http://apache-ignite-users.70518.x6.nabble.com/
>



-- 

Regards

Pavel Vinokurov