You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@ignite.apache.org by Ray <ra...@cisco.com> on 2018/07/24 08:10:00 UTC

Ignite data can't be recovered after node fail

Following node fail described in this thread
http://apache-ignite-users.70518.x6.nabble.com/Ignite-node-failed-for-no-obvious-reason-td22866.html,
I tried to reboot this node and recover the data to make Ignite cluster
available again.

First, I try reboot node2 directly but failed.
The node log is as follows.

[2018-07-24T02:57:38,956][INFO ][main][IgniteKernal] 

>>>    __________  ________________  
>>>   /  _/ ___/ |/ /  _/_  __/ __/  
>>>  _/ // (7 7    // /  / / / _/    
>>> /___/\___/_/|_/___/ /_/ /___/   
>>> 
>>> ver. 2.6.0#20180710-sha1:669feacc
>>> 2018 Copyright(C) Apache Software Foundation
>>> 
>>> Ignite documentation: http://ignite.apache.org

[2018-07-24T02:57:38,976][INFO ][main][IgniteKernal] Config URL:
file:/opt/apache-ignite-fabric-2.6.0-bin/config/persistent-config.xml
[2018-07-24T02:57:38,984][INFO ][main][IgniteKernal] IgniteConfiguration
[igniteInstanceName=null, pubPoolSize=56, svcPoolSize=56,
callbackPoolSize=56, stripedPoolSize=56, sysPoolSize=56, mgmtPoolSize=4,
igfsPoolSize=56, dataStreamerPoolSize=56, utilityCachePoolSize=56,
utilityCacheKeepAliveTime=60000, p2pPoolSize=2, qryPoolSize=56,
igniteHome=/opt/apache-ignite-fabric-2.6.0-bin,
igniteWorkDir=/opt/apache-ignite-fabric-2.6.0-bin/work,
mbeanSrv=com.sun.jmx.mbeanserver.JmxMBeanServer@6f94fa3e,
nodeId=7e3c0623-a6a5-4a7b-966e-6882b86ff922,
marsh=org.apache.ignite.internal.binary.BinaryMarshaller@1890516e,
marshLocJobs=false, daemon=false, p2pEnabled=true, netTimeout=5000,
sndRetryDelay=1000, sndRetryCnt=3, metricsHistSize=10000,
metricsUpdateFreq=2000, metricsExpTime=9223372036854775807,
discoSpi=TcpDiscoverySpi [addrRslvr=null, sockTimeout=0, ackTimeout=0,
marsh=null, reconCnt=10, reconDelay=2000, maxAckTimeout=600000,
forceSrvMode=false, clientReconnectDisabled=false, internalLsnr=null],
segPlc=RESTART_JVM, segResolveAttempts=2, waitForSegOnStart=true,
allResolversPassReq=true, segChkFreq=10000, commSpi=TcpCommunicationSpi
[connectGate=null, connPlc=null, enableForcibleNodeKill=false,
enableTroubleshootingLog=false,
srvLsnr=org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi$2@42e25b0b,
locAddr=null, locHost=null, locPort=47100, locPortRange=100, shmemPort=-1,
directBuf=true, directSndBuf=false, idleConnTimeout=600000,
connTimeout=5000, maxConnTimeout=600000, reconCnt=10, sockSndBuf=32768,
sockRcvBuf=32768, msgQueueLimit=0, slowClientQueueLimit=0, nioSrvr=null,
shmemSrv=null, usePairedConnections=false, connectionsPerNode=1,
tcpNoDelay=true, filterReachableAddresses=false, ackSndThreshold=32,
unackedMsgsBufSize=0, sockWriteTimeout=2000, lsnr=null, boundTcpPort=-1,
boundTcpShmemPort=-1, selectorsCnt=28, selectorSpins=0, addrRslvr=null,
ctxInitLatch=java.util.concurrent.CountDownLatch@39b43d60[Count = 1],
stopping=false,
metricsLsnr=org.apache.ignite.spi.communication.tcp.TcpCommunicationMetricsListener@44be0077],
evtSpi=org.apache.ignite.spi.eventstorage.NoopEventStorageSpi@2205a05d,
colSpi=NoopCollisionSpi [], deploySpi=LocalDeploymentSpi [lsnr=null],
indexingSpi=org.apache.ignite.spi.indexing.noop.NoopIndexingSpi@5f20155b,
addrRslvr=null, clientMode=false, rebalanceThreadPoolSize=1,
txCfg=org.apache.ignite.configuration.TransactionConfiguration@72ade7e3,
cacheSanityCheckEnabled=true, discoStartupDelay=60000, deployMode=SHARED,
p2pMissedCacheSize=100, locHost=null, timeSrvPortBase=31100,
timeSrvPortRange=100, failureDetectionTimeout=60000,
clientFailureDetectionTimeout=30000, metricsLogFreq=60000, hadoopCfg=null,
connectorCfg=org.apache.ignite.configuration.ConnectorConfiguration@239105a8,
odbcCfg=null, warmupClos=null, atomicCfg=AtomicConfiguration
[seqReserveSize=1000, cacheMode=PARTITIONED, backups=1, aff=null,
grpName=null], classLdr=null, sslCtxFactory=null, platformCfg=null,
binaryCfg=null, memCfg=null, pstCfg=null, dsCfg=DataStorageConfiguration
[sysRegionInitSize=41943040, sysCacheMaxSize=104857600, pageSize=0,
concLvl=0, dfltDataRegConf=DataRegionConfiguration [name=default_Region,
maxSize=493921239040, initSize=107374182400, swapPath=null,
pageEvictionMode=DISABLED, evictionThreshold=0.9, emptyPagesPoolSize=100,
metricsEnabled=false, metricsSubIntervalCount=5,
metricsRateTimeInterval=60000, persistenceEnabled=true,
checkpointPageBufSize=8589934592], storagePath=/data/ignite/persistence,
checkpointFreq=600000, lockWaitTime=10000, checkpointThreads=4,
checkpointWriteOrder=SEQUENTIAL, walHistSize=20, walSegments=10,
walSegmentSize=67108864, walPath=/wal, walArchivePath=/wal/archive,
metricsEnabled=false, walMode=BACKGROUND, walTlbSize=131072, walBuffSize=0,
walFlushFreq=5000, walFsyncDelay=1000, walRecordIterBuffSize=67108864,
alwaysWriteFullPages=false,
fileIOFactory=org.apache.ignite.internal.processors.cache.persistence.file.AsyncFileIOFactory@609bcfb6,
metricsSubIntervalCnt=5, metricsRateTimeInterval=60000,
walAutoArchiveAfterInactivity=-1, writeThrottlingEnabled=false,
walCompactionEnabled=false], activeOnStart=true, autoActivation=true,
longQryWarnTimeout=3000, sqlConnCfg=null,
cliConnCfg=ClientConnectorConfiguration [host=null, port=10800,
portRange=100, sockSndBufSize=0, sockRcvBufSize=0, tcpNoDelay=true,
maxOpenCursorsPerConn=128, threadPoolSize=56, idleTimeout=0,
jdbcEnabled=true, odbcEnabled=true, thinCliEnabled=true, sslEnabled=false,
useIgniteSslCtxFactory=true, sslClientAuth=false, sslCtxFactory=null],
authEnabled=false, failureHnd=null, commFailureRslvr=null]
[2018-07-24T02:57:38,984][INFO ][main][IgniteKernal] Daemon mode: off
[2018-07-24T02:57:38,985][INFO ][main][IgniteKernal] OS: Linux
2.6.32-696.16.1.el6.x86_64 amd64
[2018-07-24T02:57:38,985][INFO ][main][IgniteKernal] OS user: root
[2018-07-24T02:57:38,985][INFO ][main][IgniteKernal] PID: 49525
[2018-07-24T02:57:38,985][INFO ][main][IgniteKernal] Language runtime: Java
Platform API Specification ver. 1.8
[2018-07-24T02:57:38,985][INFO ][main][IgniteKernal] VM information: OpenJDK
Runtime Environment 1.8.0_161-b14 Oracle Corporation OpenJDK 64-Bit Server
VM 25.161-b14
[2018-07-24T02:57:38,986][INFO ][main][IgniteKernal] VM total memory: 31.0GB
[2018-07-24T02:57:38,986][INFO ][main][IgniteKernal] Remote Management
[restart: on, REST: on, JMX (remote: on, port: 49112, auth: off, ssl: off)]
[2018-07-24T02:57:38,987][INFO ][main][IgniteKernal] Logger: Log4J2Logger
[quiet=true, config=config/ignite-log4j2.xml]
[2018-07-24T02:57:38,987][INFO ][main][IgniteKernal]
IGNITE_HOME=/opt/apache-ignite-fabric-2.6.0-bin
[2018-07-24T02:57:38,987][INFO ][main][IgniteKernal] VM arguments: [-Xms1g,
-Xmx1g, -XX:+AggressiveOpts, -XX:MaxMetaspaceSize=256m, -DIGNITE_QUIET=true,
-DIGNITE_SUCCESS_FILE=/opt/apache-ignite-fabric-2.6.0-bin/work/ignite_success_2fd90195-cb52-4762-9732-3f1366e2e9cb,
-Dcom.sun.management.jmxremote, -Dcom.sun.management.jmxremote.port=49112,
-Dcom.sun.management.jmxremote.authenticate=false,
-Dcom.sun.management.jmxremote.ssl=false,
-DIGNITE_HOME=/opt/apache-ignite-fabric-2.6.0-bin,
-DIGNITE_PROG_NAME=./ignite.sh, -Xmx32000m, -Xms32000m, -XX:+UseG1GC,
-XX:+ScavengeBeforeFullGC, -XX:+DisableExplicitGC, -XX:+AlwaysPreTouch,
-XX:+PrintGCDetails, -XX:+PrintGCTimeStamps, -XX:+PrintGCDateStamps,
-XX:+PrintAdaptiveSizePolicy,
-Xloggc:/spare/ignite/log/ignitegc-2018_07_24-02_57.log]
[2018-07-24T02:57:38,987][INFO ][main][IgniteKernal] System cache's
DataRegion size is configured to 40 MB. Use
DataStorageConfiguration.systemCacheMemorySize property to change the
setting.
[2018-07-24T02:57:38,993][INFO ][main][IgniteKernal] Configured caches [in
'sysMemPlc' dataRegion: ['ignite-sys-cache']]
[2018-07-24T02:57:38,993][WARN ][main][IgniteKernal] Peer class loading is
enabled (disable it in production for performance and deployment consistency
reasons)
[2018-07-24T02:57:38,996][INFO ][main][IgniteKernal] 3-rd party licenses can
be found at: /opt/apache-ignite-fabric-2.6.0-bin/libs/licenses
[2018-07-24T02:57:39,070][INFO ][main][IgnitePluginProcessor] Configured
plugins:
[2018-07-24T02:57:39,071][INFO ][main][IgnitePluginProcessor]   ^-- None
[2018-07-24T02:57:39,071][INFO ][main][IgnitePluginProcessor] 
[2018-07-24T02:57:39,072][INFO ][main][FailureProcessor] Configured failure
handler: [hnd=StopNodeOrHaltFailureHandler [tryStop=false, timeout=0]]
[2018-07-24T02:57:39,109][INFO ][main][TcpCommunicationSpi] Successfully
bound communication NIO server to TCP port [port=47100,
locHost=0.0.0.0/0.0.0.0, selectorsCnt=28, selectorSpins=0, pairedConn=false]
[2018-07-24T02:57:39,110][WARN ][main][TcpCommunicationSpi] Message queue
limit is set to 0 which may lead to potential OOMEs when running cache
operations in FULL_ASYNC or PRIMARY_SYNC modes due to message queues growth
on sender and receiver sides.
[2018-07-24T02:57:39,126][WARN ][main][NoopCheckpointSpi] Checkpoints are
disabled (to enable configure any GridCheckpointSpi implementation)
[2018-07-24T02:57:39,145][WARN ][main][GridCollisionManager] Collision
resolution is disabled (all jobs will be activated upon arrival).
[2018-07-24T02:57:39,146][INFO ][main][IgniteKernal] Security status
[authentication=off, tls/ssl=off]
[2018-07-24T02:57:39,171][INFO ][main][TcpDiscoverySpi] Successfully bound
to TCP port [port=49500, localHost=0.0.0.0/0.0.0.0,
locNodeId=7e3c0623-a6a5-4a7b-966e-6882b86ff922]
[2018-07-24T02:57:39,178][INFO ][main][PdsFoldersResolver] Successfully
locked persistence storage folder
[/data/ignite/persistence/node00-33503bf4-323b-4965-8bb1-31597d3bedf4]
[2018-07-24T02:57:39,178][INFO ][main][PdsFoldersResolver] Consistent ID
used for local node is [33503bf4-323b-4965-8bb1-31597d3bedf4] according to
persistence data storage folders
[2018-07-24T02:57:39,178][INFO ][main][CacheObjectBinaryProcessorImpl]
Resolved directory for serialized binary metadata:
/opt/apache-ignite-fabric-2.6.0-bin/work/binary_meta/node00-33503bf4-323b-4965-8bb1-31597d3bedf4
[2018-07-24T02:57:39,361][INFO ][main][FilePageStoreManager] Resolved page
store work directory:
/data/ignite/persistence/node00-33503bf4-323b-4965-8bb1-31597d3bedf4
[2018-07-24T02:57:39,361][INFO ][main][FileWriteAheadLogManager] Resolved
write ahead log work directory:
/wal/node00-33503bf4-323b-4965-8bb1-31597d3bedf4
[2018-07-24T02:57:39,361][INFO ][main][FileWriteAheadLogManager] Resolved
write ahead log archive directory:
/wal/archive/node00-33503bf4-323b-4965-8bb1-31597d3bedf4
[2018-07-24T02:57:39,422][INFO ][main][FileWriteAheadLogManager] Started
write-ahead log manager [mode=BACKGROUND]
[2018-07-24T02:57:39,454][INFO ][main][GridCacheDatabaseSharedManager] Read
checkpoint status
[startMarker=/data/ignite/persistence/node00-33503bf4-323b-4965-8bb1-31597d3bedf4/cp/1532374156904-fb69dc16-1947-411c-bc5e-ee6540ba8e53-START.bin,
endMarker=/data/ignite/persistence/node00-33503bf4-323b-4965-8bb1-31597d3bedf4/cp/1532374156904-fb69dc16-1947-411c-bc5e-ee6540ba8e53-END.bin]
[2018-07-24T02:57:39,465][INFO ][main][PageMemoryImpl] Started page memory
[memoryAllocated=100.0 MiB, pages=24808, tableSize=1.9 MiB,
checkpointBuffer=100.0 MiB]
[2018-07-24T02:57:39,465][INFO ][main][GridCacheDatabaseSharedManager]
Checking memory state [lastValidPos=FileWALPointer [idx=21672,
fileOff=13729571, len=27723], lastMarked=FileWALPointer [idx=21672,
fileOff=13729571, len=27723],
lastCheckpointId=fb69dc16-1947-411c-bc5e-ee6540ba8e53]
[2018-07-24T02:57:39,544][WARN ][main][FileWriteAheadLogManager] WAL segment
tail is reached. [ Expected next state: {Index=21672,Offset=13757294},
Actual state : {Index=3690196541643296009,Offset=603992117} ]
[2018-07-24T02:57:39,544][INFO ][main][GridCacheDatabaseSharedManager] Found
last checkpoint marker [cpId=fb69dc16-1947-411c-bc5e-ee6540ba8e53,
pos=FileWALPointer [idx=21672, fileOff=13729571, len=27723]]
[2018-07-24T02:57:39,569][INFO ][main][GridCacheDatabaseSharedManager]
Applying lost cache updates since last checkpoint record
[lastMarked=FileWALPointer [idx=21672, fileOff=13729571, len=27723],
lastCheckpointId=fb69dc16-1947-411c-bc5e-ee6540ba8e53]
[2018-07-24T02:57:39,628][WARN ][main][FileWriteAheadLogManager] WAL segment
tail is reached. [ Expected next state: {Index=21672,Offset=13757294},
Actual state : {Index=3690196541643296009,Offset=603992117} ]
[2018-07-24T02:57:39,629][INFO ][main][GridCacheDatabaseSharedManager]
Finished applying WAL changes [updatesApplied=0, time=61ms]
[2018-07-24T02:57:39,666][INFO ][main][GridClusterStateProcessor] Restoring
history for BaselineTopology[id=0]
[2018-07-24T02:57:39,752][INFO ][main][ClientListenerProcessor] Client
connector processor has started on TCP port 10800
[2018-07-24T02:57:39,790][INFO ][main][GridTcpRestProtocol] Command protocol
successfully started [name=TCP binary, host=0.0.0.0/0.0.0.0, port=11211]
[2018-07-24T02:57:39,939][INFO ][main][GridJettyRestProtocol] Command
protocol successfully started [name=Jetty REST, host=/0.0.0.0, port=8080]
[2018-07-24T02:57:39,977][INFO ][main][IgniteKernal] Non-loopback local IPs:
10.252.10.4
[2018-07-24T02:57:39,977][INFO ][main][IgniteKernal] Enabled local MACs:
7079B364407D
[2018-07-24T02:57:40,098][INFO ][tcp-disco-srvr-#2][TcpDiscoverySpi] TCP
discovery accepted incoming connection [rmtAddr=/10.252.10.20,
rmtPort=51032]
[2018-07-24T02:57:40,107][INFO ][tcp-disco-srvr-#2][TcpDiscoverySpi] TCP
discovery spawning a new thread for connection [rmtAddr=/10.252.10.20,
rmtPort=51032]
[2018-07-24T02:57:40,107][INFO ][tcp-disco-sock-reader-#4][TcpDiscoverySpi]
Started serving remote node connection [rmtAddr=/10.252.10.20:51032,
rmtPort=51032]
[2018-07-24T02:57:40,329][ERROR][tcp-disco-msg-worker-#3][TcpDiscoverySpi]
TcpDiscoverSpi's message worker thread failed abnormally. Stopping the node
in order to prevent cluster wide instability.
org.apache.ignite.IgniteException: Node with BaselineTopology cannot join
mixed cluster running in compatibility mode
	at
org.apache.ignite.internal.processors.cluster.GridClusterStateProcessor.onGridDataReceived(GridClusterStateProcessor.java:714)
~[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$5.onExchange(GridDiscoveryManager.java:883)
~[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi.onExchange(TcpDiscoverySpi.java:1939)
~[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.processNodeAddedMessage(ServerImpl.java:4354)
~[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.processMessage(ServerImpl.java:2744)
~[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.processMessage(ServerImpl.java:2536)
~[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.spi.discovery.tcp.ServerImpl$MessageWorkerAdapter.body(ServerImpl.java:6775)
~[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.body(ServerImpl.java:2621)
[ignite-core-2.6.0.jar:2.6.0]
	at org.apache.ignite.spi.IgniteSpiThread.run(IgniteSpiThread.java:62)
[ignite-core-2.6.0.jar:2.6.0]
[2018-07-24T02:57:40,333][ERROR][tcp-disco-msg-worker-#3][] Critical system
error detected. Will be handled accordingly to configured handler [hnd=class
o.a.i.failure.StopNodeOrHaltFailureHandler, failureCtx=FailureContext
[type=SYSTEM_WORKER_TERMINATION, err=class o.a.i.IgniteException: Node with
BaselineTopology cannot join mixed cluster running in compatibility mode]]
org.apache.ignite.IgniteException: Node with BaselineTopology cannot join
mixed cluster running in compatibility mode
	at
org.apache.ignite.internal.processors.cluster.GridClusterStateProcessor.onGridDataReceived(GridClusterStateProcessor.java:714)
~[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$5.onExchange(GridDiscoveryManager.java:883)
~[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi.onExchange(TcpDiscoverySpi.java:1939)
~[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.processNodeAddedMessage(ServerImpl.java:4354)
~[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.processMessage(ServerImpl.java:2744)
~[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.processMessage(ServerImpl.java:2536)
~[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.spi.discovery.tcp.ServerImpl$MessageWorkerAdapter.body(ServerImpl.java:6775)
~[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.body(ServerImpl.java:2621)
[ignite-core-2.6.0.jar:2.6.0]
	at org.apache.ignite.spi.IgniteSpiThread.run(IgniteSpiThread.java:62)
[ignite-core-2.6.0.jar:2.6.0]
[2018-07-24T02:57:40,334][ERROR][main][IgniteKernal] Failed to start
manager: GridManagerAdapter [enabled=true,
name=o.a.i.i.managers.discovery.GridDiscoveryManager]
org.apache.ignite.IgniteCheckedException: Failed to start SPI:
TcpDiscoverySpi [addrRslvr=null, sockTimeout=5000, ackTimeout=5000,
marsh=JdkMarshaller
[clsFilter=org.apache.ignite.marshaller.MarshallerUtils$1@64a9d48c],
reconCnt=10, reconDelay=2000, maxAckTimeout=600000, forceSrvMode=false,
clientReconnectDisabled=false, internalLsnr=null]
	at
org.apache.ignite.internal.managers.GridManagerAdapter.startSpi(GridManagerAdapter.java:300)
~[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager.start(GridDiscoveryManager.java:915)
~[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.internal.IgniteKernal.startManager(IgniteKernal.java:1721)
[ignite-core-2.6.0.jar:2.6.0]
	at org.apache.ignite.internal.IgniteKernal.start(IgniteKernal.java:1028)
[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.start0(IgnitionEx.java:2014)
[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.start(IgnitionEx.java:1723)
[ignite-core-2.6.0.jar:2.6.0]
	at org.apache.ignite.internal.IgnitionEx.start0(IgnitionEx.java:1151)
[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.internal.IgnitionEx.startConfigurations(IgnitionEx.java:1069)
[ignite-core-2.6.0.jar:2.6.0]
	at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.java:955)
[ignite-core-2.6.0.jar:2.6.0]
	at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.java:854)
[ignite-core-2.6.0.jar:2.6.0]
	at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.java:724)
[ignite-core-2.6.0.jar:2.6.0]
	at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.java:693)
[ignite-core-2.6.0.jar:2.6.0]
	at org.apache.ignite.Ignition.start(Ignition.java:352)
[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.startup.cmdline.CommandLineStartup.main(CommandLineStartup.java:301)
[ignite-core-2.6.0.jar:2.6.0]
Caused by: org.apache.ignite.spi.IgniteSpiException: Thread has been
interrupted.
	at
org.apache.ignite.spi.discovery.tcp.ServerImpl.joinTopology(ServerImpl.java:938)
~[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.spi.discovery.tcp.ServerImpl.spiStart(ServerImpl.java:373)
~[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi.spiStart(TcpDiscoverySpi.java:1948)
~[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.internal.managers.GridManagerAdapter.startSpi(GridManagerAdapter.java:297)
~[ignite-core-2.6.0.jar:2.6.0]
	... 13 more
[2018-07-24T02:57:40,336][ERROR][tcp-disco-msg-worker-#3][] JVM will be
halted immediately due to the failure: [failureCtx=FailureContext
[type=SYSTEM_WORKER_TERMINATION, err=class o.a.i.IgniteException: Node with
BaselineTopology cannot join mixed cluster running in compatibility mode]]
[2018-07-24T02:57:40,335][ERROR][main][IgniteKernal] Got exception while
starting (will rollback startup routine).
org.apache.ignite.IgniteCheckedException: Failed to start manager:
GridManagerAdapter [enabled=true,
name=org.apache.ignite.internal.managers.discovery.GridDiscoveryManager]
	at
org.apache.ignite.internal.IgniteKernal.startManager(IgniteKernal.java:1726)
~[ignite-core-2.6.0.jar:2.6.0]
	at org.apache.ignite.internal.IgniteKernal.start(IgniteKernal.java:1028)
[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.start0(IgnitionEx.java:2014)
[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.start(IgnitionEx.java:1723)
[ignite-core-2.6.0.jar:2.6.0]
	at org.apache.ignite.internal.IgnitionEx.start0(IgnitionEx.java:1151)
[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.internal.IgnitionEx.startConfigurations(IgnitionEx.java:1069)
[ignite-core-2.6.0.jar:2.6.0]
	at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.java:955)
[ignite-core-2.6.0.jar:2.6.0]
	at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.java:854)
[ignite-core-2.6.0.jar:2.6.0]
	at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.java:724)
[ignite-core-2.6.0.jar:2.6.0]
	at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.java:693)
[ignite-core-2.6.0.jar:2.6.0]
	at org.apache.ignite.Ignition.start(Ignition.java:352)
[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.startup.cmdline.CommandLineStartup.main(CommandLineStartup.java:301)
[ignite-core-2.6.0.jar:2.6.0]
Caused by: org.apache.ignite.IgniteCheckedException: Failed to start SPI:
TcpDiscoverySpi [addrRslvr=null, sockTimeout=5000, ackTimeout=5000,
marsh=JdkMarshaller
[clsFilter=org.apache.ignite.marshaller.MarshallerUtils$1@64a9d48c],
reconCnt=10, reconDelay=2000, maxAckTimeout=600000, forceSrvMode=false,
clientReconnectDisabled=false, internalLsnr=null]
	at
org.apache.ignite.internal.managers.GridManagerAdapter.startSpi(GridManagerAdapter.java:300)
~[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager.start(GridDiscoveryManager.java:915)
~[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.internal.IgniteKernal.startManager(IgniteKernal.java:1721)
~[ignite-core-2.6.0.jar:2.6.0]
	... 11 more
Caused by: org.apache.ignite.spi.IgniteSpiException: Thread has been
interrupted.
	at
org.apache.ignite.spi.discovery.tcp.ServerImpl.joinTopology(ServerImpl.java:938)
~[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.spi.discovery.tcp.ServerImpl.spiStart(ServerImpl.java:373)
~[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi.spiStart(TcpDiscoverySpi.java:1948)
~[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.internal.managers.GridManagerAdapter.startSpi(GridManagerAdapter.java:297)
~[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.internal.managers.discovery.GridDiscoveryManager.start(GridDiscoveryManager.java:915)
~[ignite-core-2.6.0.jar:2.6.0]
	at
org.apache.ignite.internal.IgniteKernal.startManager(IgniteKernal.java:1721)
~[ignite-core-2.6.0.jar:2.6.0]
	... 11 more


The next step I tried is stop node1 and node3, then start node2,node1,node3.
The cluster is up but after a while there's this message in log "Partition
states validation has failed" and the cluster starts to rebalance.

I think the data and partition information in node2 is outdated because when
node2 is down there's new data ingested into node1 and node3.

But after a while there's this message in log "[WARN
][exchange-worker-#162][GridDhtPartitionsExchangeFuture] Unable to await
partitions release latch within timeout: ServerLatch [permits=2,
pendingAcks=[0f687998-bb44-4d6c-8cac-c3dd05b28b9b,
f1eaed2f-d2b3-429f-8e97-ac150e106d3e], super=CompletableLatch [id=exchange,
topVer=AffinityTopologyVersion [topVer=4, minorTopVer=0]]]" and the cluster
freezes.
Here's the detailed log.

second.log
<http://apache-ignite-users.70518.x6.nabble.com/file/t1346/second.log>  

I have to stop the cluster and try to reboot again.
The cluster is up and I ingested some data into Ignite using Spark dataframe
API and triggered a checkpoint.
And the error message is the same as my second try, also the cluster
freezed. 
Here's the detailed log.
third.log
<http://apache-ignite-users.70518.x6.nabble.com/file/t1346/third.log>  

When I try to reboot again.
Now the cluster is in unrecoverable model.
Here's the detailed log.
fourth.log
<http://apache-ignite-users.70518.x6.nabble.com/file/t1346/fourth.log>  

And this is files in the checkpoint folder.
ll -t
/data/ignite/persistence/node00-33503bf4-323b-4965-8bb1-31597d3bedf4/cp
total 232
-rw------- 1 root root 16 Jul 24 05:15
1532409300819-4c35d510-f4ef-4a9d-8311-91afbe08cdb2-START.bin
-rw------- 1 root root 16 Jul 24 05:14 1532409286225-node-started.bin
-rw------- 1 root root 16 Jul 24 05:13 1532409199626-node-started.bin
-rw------- 1 root root 16 Jul 24 05:13
1532409179923-e8332756-9efc-4baa-b854-722d858fe879-END.bin
-rw------- 1 root root 16 Jul 24 05:12
1532409179923-e8332756-9efc-4baa-b854-722d858fe879-START.bin
-rw------- 1 root root 16 Jul 24 05:12 1532409166705-node-started.bin
-rw------- 1 root root 16 Jul 24 04:30 1532406650280-node-started.bin
-rw------- 1 root root 16 Jul 24 04:30
1532406379589-0f14ace1-476c-4751-9237-c32b44409348-END.bin
-rw------- 1 root root 16 Jul 24 04:26
1532406379589-0f14ace1-476c-4751-9237-c32b44409348-START.bin
-rw------- 1 root root 16 Jul 24 04:20
1532405902580-4f0d9446-df20-4327-89f5-66db3c3d231d-END.bin
-rw------- 1 root root 16 Jul 24 04:18
1532405902580-4f0d9446-df20-4327-89f5-66db3c3d231d-START.bin
-rw------- 1 root root 16 Jul 24 04:11
1532405301597-7ed8ec28-4761-431b-a702-3401db38e43e-END.bin
-rw------- 1 root root 16 Jul 24 04:08
1532405301597-7ed8ec28-4761-431b-a702-3401db38e43e-START.bin
-rw------- 1 root root 16 Jul 24 04:05
1532404700308-38bfb4f8-ffe2-4fca-a4ac-488146fb8bc6-END.bin
-rw------- 1 root root 16 Jul 24 03:58
1532404700308-38bfb4f8-ffe2-4fca-a4ac-488146fb8bc6-START.bin
-rw------- 1 root root 16 Jul 24 03:48
1532404100330-20d1e18a-6c2b-468b-8058-913140a73778-END.bin
-rw------- 1 root root 16 Jul 24 03:48
1532404100330-20d1e18a-6c2b-468b-8058-913140a73778-START.bin
-rw------- 1 root root 16 Jul 24 03:38
1532403500142-500c576d-f6b6-4677-aa20-b2ea27275241-END.bin
-rw------- 1 root root 16 Jul 24 03:38
1532403500142-500c576d-f6b6-4677-aa20-b2ea27275241-START.bin
-rw------- 1 root root 16 Jul 24 03:28
1532402900158-21ff15b0-f404-402a-8ae3-7daf68b0bd7c-END.bin
-rw------- 1 root root 16 Jul 24 03:28
1532402900158-21ff15b0-f404-402a-8ae3-7daf68b0bd7c-START.bin
-rw------- 1 root root 16 Jul 24 03:18
1532402300056-a6109747-fe76-4b6b-869c-ea7529ee42fb-END.bin
-rw------- 1 root root 16 Jul 24 03:18
1532402300056-a6109747-fe76-4b6b-869c-ea7529ee42fb-START.bin
-rw------- 1 root root 16 Jul 24 03:17 1532402278723-node-started.bin
-rw------- 1 root root 16 Jul 24 03:17
1532401832758-43fc7608-a11e-4176-a879-37a1ee0f9f37-END.bin
-rw------- 1 root root 16 Jul 24 03:10
1532401832758-43fc7608-a11e-4176-a879-37a1ee0f9f37-START.bin
-rw------- 1 root root 16 Jul 24 03:00
1532401232746-5f6483fe-4d1b-4426-8e25-28d3dc1b156b-END.bin
-rw------- 1 root root 16 Jul 24 03:00
1532401232746-5f6483fe-4d1b-4426-8e25-28d3dc1b156b-START.bin
-rw------- 1 root root 16 Jul 24 03:00 1532401217918-node-started.bin
-rw------- 1 root root 16 Jul 23 19:29
1532374156904-fb69dc16-1947-411c-bc5e-ee6540ba8e53-END.bin
-rw------- 1 root root 16 Jul 23 19:29
1532374156904-fb69dc16-1947-411c-bc5e-ee6540ba8e53-START.bin
-rw------- 1 root root 16 Jul 23 17:19
1532366356784-bf07f598-0aa4-4503-a594-336b8eccbdbe-END.bin
-rw------- 1 root root 16 Jul 23 17:19
1532366356784-bf07f598-0aa4-4503-a594-336b8eccbdbe-START.bin
-rw------- 1 root root 16 Jul 23 17:09
1532365756812-ff66bf97-c4fd-4ae3-9aa0-872fb09eb610-END.bin
-rw------- 1 root root 16 Jul 23 17:09
1532365756812-ff66bf97-c4fd-4ae3-9aa0-872fb09eb610-START.bin
-rw------- 1 root root 16 Jul 23 16:59
1532365156819-9774fab8-da28-4188-80ee-a5753668b52a-END.bin
-rw------- 1 root root 16 Jul 23 16:59
1532365156819-9774fab8-da28-4188-80ee-a5753668b52a-START.bin
-rw------- 1 root root 16 Jul 23 16:49
1532364556960-64ee6f39-dc21-4fa2-b472-c00d3d738a87-END.bin
-rw------- 1 root root 16 Jul 23 16:49
1532364556960-64ee6f39-dc21-4fa2-b472-c00d3d738a87-START.bin
-rw------- 1 root root 16 Jul 23 16:39
1532363956827-d9a3189d-586f-437f-9d86-463bdb23ae1f-END.bin
-rw------- 1 root root 16 Jul 23 16:39
1532363956827-d9a3189d-586f-437f-9d86-463bdb23ae1f-START.bin



--
Sent from: http://apache-ignite-users.70518.x6.nabble.com/

Re: Ignite data can't be recovered after node fail

Posted by Evgenii Zhuravlev <e....@gmail.com>.
Hi,

Do you have daemon node started(i.e. visor) ? Message "Node with
BaselineTopology cannot join mixed cluster running in compatibility mode"
looks like related to the issue
https://issues.apache.org/jira/browse/IGNITE-8774

Evgenii

2018-07-24 11:10 GMT+03:00 Ray <ra...@cisco.com>:

> Following node fail described in this thread
> http://apache-ignite-users.70518.x6.nabble.com/Ignite-
> node-failed-for-no-obvious-reason-td22866.html,
> I tried to reboot this node and recover the data to make Ignite cluster
> available again.
>
> First, I try reboot node2 directly but failed.
> The node log is as follows.
>
> [2018-07-24T02:57:38,956][INFO ][main][IgniteKernal]
>
> >>>    __________  ________________
> >>>   /  _/ ___/ |/ /  _/_  __/ __/
> >>>  _/ // (7 7    // /  / / / _/
> >>> /___/\___/_/|_/___/ /_/ /___/
> >>>
> >>> ver. 2.6.0#20180710-sha1:669feacc
> >>> 2018 Copyright(C) Apache Software Foundation
> >>>
> >>> Ignite documentation: http://ignite.apache.org
>
> [2018-07-24T02:57:38,976][INFO ][main][IgniteKernal] Config URL:
> file:/opt/apache-ignite-fabric-2.6.0-bin/config/persistent-config.xml
> [2018-07-24T02:57:38,984][INFO ][main][IgniteKernal] IgniteConfiguration
> [igniteInstanceName=null, pubPoolSize=56, svcPoolSize=56,
> callbackPoolSize=56, stripedPoolSize=56, sysPoolSize=56, mgmtPoolSize=4,
> igfsPoolSize=56, dataStreamerPoolSize=56, utilityCachePoolSize=56,
> utilityCacheKeepAliveTime=60000, p2pPoolSize=2, qryPoolSize=56,
> igniteHome=/opt/apache-ignite-fabric-2.6.0-bin,
> igniteWorkDir=/opt/apache-ignite-fabric-2.6.0-bin/work,
> mbeanSrv=com.sun.jmx.mbeanserver.JmxMBeanServer@6f94fa3e,
> nodeId=7e3c0623-a6a5-4a7b-966e-6882b86ff922,
> marsh=org.apache.ignite.internal.binary.BinaryMarshaller@1890516e,
> marshLocJobs=false, daemon=false, p2pEnabled=true, netTimeout=5000,
> sndRetryDelay=1000, sndRetryCnt=3, metricsHistSize=10000,
> metricsUpdateFreq=2000, metricsExpTime=9223372036854775807,
> discoSpi=TcpDiscoverySpi [addrRslvr=null, sockTimeout=0, ackTimeout=0,
> marsh=null, reconCnt=10, reconDelay=2000, maxAckTimeout=600000,
> forceSrvMode=false, clientReconnectDisabled=false, internalLsnr=null],
> segPlc=RESTART_JVM, segResolveAttempts=2, waitForSegOnStart=true,
> allResolversPassReq=true, segChkFreq=10000, commSpi=TcpCommunicationSpi
> [connectGate=null, connPlc=null, enableForcibleNodeKill=false,
> enableTroubleshootingLog=false,
> srvLsnr=org.apache.ignite.spi.communication.tcp.
> TcpCommunicationSpi$2@42e25b0b,
> locAddr=null, locHost=null, locPort=47100, locPortRange=100, shmemPort=-1,
> directBuf=true, directSndBuf=false, idleConnTimeout=600000,
> connTimeout=5000, maxConnTimeout=600000, reconCnt=10, sockSndBuf=32768,
> sockRcvBuf=32768, msgQueueLimit=0, slowClientQueueLimit=0, nioSrvr=null,
> shmemSrv=null, usePairedConnections=false, connectionsPerNode=1,
> tcpNoDelay=true, filterReachableAddresses=false, ackSndThreshold=32,
> unackedMsgsBufSize=0, sockWriteTimeout=2000, lsnr=null, boundTcpPort=-1,
> boundTcpShmemPort=-1, selectorsCnt=28, selectorSpins=0, addrRslvr=null,
> ctxInitLatch=java.util.concurrent.CountDownLatch@39b43d60[Count = 1],
> stopping=false,
> metricsLsnr=org.apache.ignite.spi.communication.tcp.
> TcpCommunicationMetricsListener@44be0077],
> evtSpi=org.apache.ignite.spi.eventstorage.NoopEventStorageSpi@2205a05d,
> colSpi=NoopCollisionSpi [], deploySpi=LocalDeploymentSpi [lsnr=null],
> indexingSpi=org.apache.ignite.spi.indexing.noop.NoopIndexingSpi@5f20155b,
> addrRslvr=null, clientMode=false, rebalanceThreadPoolSize=1,
> txCfg=org.apache.ignite.configuration.TransactionConfiguration@72ade7e3,
> cacheSanityCheckEnabled=true, discoStartupDelay=60000, deployMode=SHARED,
> p2pMissedCacheSize=100, locHost=null, timeSrvPortBase=31100,
> timeSrvPortRange=100, failureDetectionTimeout=60000,
> clientFailureDetectionTimeout=30000, metricsLogFreq=60000, hadoopCfg=null,
> connectorCfg=org.apache.ignite.configuration.ConnectorConfiguration@
> 239105a8,
> odbcCfg=null, warmupClos=null, atomicCfg=AtomicConfiguration
> [seqReserveSize=1000, cacheMode=PARTITIONED, backups=1, aff=null,
> grpName=null], classLdr=null, sslCtxFactory=null, platformCfg=null,
> binaryCfg=null, memCfg=null, pstCfg=null, dsCfg=DataStorageConfiguration
> [sysRegionInitSize=41943040, sysCacheMaxSize=104857600, pageSize=0,
> concLvl=0, dfltDataRegConf=DataRegionConfiguration [name=default_Region,
> maxSize=493921239040, initSize=107374182400, swapPath=null,
> pageEvictionMode=DISABLED, evictionThreshold=0.9, emptyPagesPoolSize=100,
> metricsEnabled=false, metricsSubIntervalCount=5,
> metricsRateTimeInterval=60000, persistenceEnabled=true,
> checkpointPageBufSize=8589934592], storagePath=/data/ignite/persistence,
> checkpointFreq=600000, lockWaitTime=10000, checkpointThreads=4,
> checkpointWriteOrder=SEQUENTIAL, walHistSize=20, walSegments=10,
> walSegmentSize=67108864, walPath=/wal, walArchivePath=/wal/archive,
> metricsEnabled=false, walMode=BACKGROUND, walTlbSize=131072, walBuffSize=0,
> walFlushFreq=5000, walFsyncDelay=1000, walRecordIterBuffSize=67108864,
> alwaysWriteFullPages=false,
> fileIOFactory=org.apache.ignite.internal.processors.
> cache.persistence.file.AsyncFileIOFactory@609bcfb6,
> metricsSubIntervalCnt=5, metricsRateTimeInterval=60000,
> walAutoArchiveAfterInactivity=-1, writeThrottlingEnabled=false,
> walCompactionEnabled=false], activeOnStart=true, autoActivation=true,
> longQryWarnTimeout=3000, sqlConnCfg=null,
> cliConnCfg=ClientConnectorConfiguration [host=null, port=10800,
> portRange=100, sockSndBufSize=0, sockRcvBufSize=0, tcpNoDelay=true,
> maxOpenCursorsPerConn=128, threadPoolSize=56, idleTimeout=0,
> jdbcEnabled=true, odbcEnabled=true, thinCliEnabled=true, sslEnabled=false,
> useIgniteSslCtxFactory=true, sslClientAuth=false, sslCtxFactory=null],
> authEnabled=false, failureHnd=null, commFailureRslvr=null]
> [2018-07-24T02:57:38,984][INFO ][main][IgniteKernal] Daemon mode: off
> [2018-07-24T02:57:38,985][INFO ][main][IgniteKernal] OS: Linux
> 2.6.32-696.16.1.el6.x86_64 amd64
> [2018-07-24T02:57:38,985][INFO ][main][IgniteKernal] OS user: root
> [2018-07-24T02:57:38,985][INFO ][main][IgniteKernal] PID: 49525
> [2018-07-24T02:57:38,985][INFO ][main][IgniteKernal] Language runtime: Java
> Platform API Specification ver. 1.8
> [2018-07-24T02:57:38,985][INFO ][main][IgniteKernal] VM information:
> OpenJDK
> Runtime Environment 1.8.0_161-b14 Oracle Corporation OpenJDK 64-Bit Server
> VM 25.161-b14
> [2018-07-24T02:57:38,986][INFO ][main][IgniteKernal] VM total memory:
> 31.0GB
> [2018-07-24T02:57:38,986][INFO ][main][IgniteKernal] Remote Management
> [restart: on, REST: on, JMX (remote: on, port: 49112, auth: off, ssl: off)]
> [2018-07-24T02:57:38,987][INFO ][main][IgniteKernal] Logger: Log4J2Logger
> [quiet=true, config=config/ignite-log4j2.xml]
> [2018-07-24T02:57:38,987][INFO ][main][IgniteKernal]
> IGNITE_HOME=/opt/apache-ignite-fabric-2.6.0-bin
> [2018-07-24T02:57:38,987][INFO ][main][IgniteKernal] VM arguments: [-Xms1g,
> -Xmx1g, -XX:+AggressiveOpts, -XX:MaxMetaspaceSize=256m,
> -DIGNITE_QUIET=true,
> -DIGNITE_SUCCESS_FILE=/opt/apache-ignite-fabric-2.6.0-
> bin/work/ignite_success_2fd90195-cb52-4762-9732-3f1366e2e9cb,
> -Dcom.sun.management.jmxremote, -Dcom.sun.management.jmxremote.port=49112,
> -Dcom.sun.management.jmxremote.authenticate=false,
> -Dcom.sun.management.jmxremote.ssl=false,
> -DIGNITE_HOME=/opt/apache-ignite-fabric-2.6.0-bin,
> -DIGNITE_PROG_NAME=./ignite.sh, -Xmx32000m, -Xms32000m, -XX:+UseG1GC,
> -XX:+ScavengeBeforeFullGC, -XX:+DisableExplicitGC, -XX:+AlwaysPreTouch,
> -XX:+PrintGCDetails, -XX:+PrintGCTimeStamps, -XX:+PrintGCDateStamps,
> -XX:+PrintAdaptiveSizePolicy,
> -Xloggc:/spare/ignite/log/ignitegc-2018_07_24-02_57.log]
> [2018-07-24T02:57:38,987][INFO ][main][IgniteKernal] System cache's
> DataRegion size is configured to 40 MB. Use
> DataStorageConfiguration.systemCacheMemorySize property to change the
> setting.
> [2018-07-24T02:57:38,993][INFO ][main][IgniteKernal] Configured caches [in
> 'sysMemPlc' dataRegion: ['ignite-sys-cache']]
> [2018-07-24T02:57:38,993][WARN ][main][IgniteKernal] Peer class loading is
> enabled (disable it in production for performance and deployment
> consistency
> reasons)
> [2018-07-24T02:57:38,996][INFO ][main][IgniteKernal] 3-rd party licenses
> can
> be found at: /opt/apache-ignite-fabric-2.6.0-bin/libs/licenses
> [2018-07-24T02:57:39,070][INFO ][main][IgnitePluginProcessor] Configured
> plugins:
> [2018-07-24T02:57:39,071][INFO ][main][IgnitePluginProcessor]   ^-- None
> [2018-07-24T02:57:39,071][INFO ][main][IgnitePluginProcessor]
> [2018-07-24T02:57:39,072][INFO ][main][FailureProcessor] Configured failure
> handler: [hnd=StopNodeOrHaltFailureHandler [tryStop=false, timeout=0]]
> [2018-07-24T02:57:39,109][INFO ][main][TcpCommunicationSpi] Successfully
> bound communication NIO server to TCP port [port=47100,
> locHost=0.0.0.0/0.0.0.0, selectorsCnt=28, selectorSpins=0,
> pairedConn=false]
> [2018-07-24T02:57:39,110][WARN ][main][TcpCommunicationSpi] Message queue
> limit is set to 0 which may lead to potential OOMEs when running cache
> operations in FULL_ASYNC or PRIMARY_SYNC modes due to message queues growth
> on sender and receiver sides.
> [2018-07-24T02:57:39,126][WARN ][main][NoopCheckpointSpi] Checkpoints are
> disabled (to enable configure any GridCheckpointSpi implementation)
> [2018-07-24T02:57:39,145][WARN ][main][GridCollisionManager] Collision
> resolution is disabled (all jobs will be activated upon arrival).
> [2018-07-24T02:57:39,146][INFO ][main][IgniteKernal] Security status
> [authentication=off, tls/ssl=off]
> [2018-07-24T02:57:39,171][INFO ][main][TcpDiscoverySpi] Successfully bound
> to TCP port [port=49500, localHost=0.0.0.0/0.0.0.0,
> locNodeId=7e3c0623-a6a5-4a7b-966e-6882b86ff922]
> [2018-07-24T02:57:39,178][INFO ][main][PdsFoldersResolver] Successfully
> locked persistence storage folder
> [/data/ignite/persistence/node00-33503bf4-323b-4965-8bb1-31597d3bedf4]
> [2018-07-24T02:57:39,178][INFO ][main][PdsFoldersResolver] Consistent ID
> used for local node is [33503bf4-323b-4965-8bb1-31597d3bedf4] according to
> persistence data storage folders
> [2018-07-24T02:57:39,178][INFO ][main][CacheObjectBinaryProcessorImpl]
> Resolved directory for serialized binary metadata:
> /opt/apache-ignite-fabric-2.6.0-bin/work/binary_meta/node00-
> 33503bf4-323b-4965-8bb1-31597d3bedf4
> [2018-07-24T02:57:39,361][INFO ][main][FilePageStoreManager] Resolved page
> store work directory:
> /data/ignite/persistence/node00-33503bf4-323b-4965-8bb1-31597d3bedf4
> [2018-07-24T02:57:39,361][INFO ][main][FileWriteAheadLogManager] Resolved
> write ahead log work directory:
> /wal/node00-33503bf4-323b-4965-8bb1-31597d3bedf4
> [2018-07-24T02:57:39,361][INFO ][main][FileWriteAheadLogManager] Resolved
> write ahead log archive directory:
> /wal/archive/node00-33503bf4-323b-4965-8bb1-31597d3bedf4
> [2018-07-24T02:57:39,422][INFO ][main][FileWriteAheadLogManager] Started
> write-ahead log manager [mode=BACKGROUND]
> [2018-07-24T02:57:39,454][INFO ][main][GridCacheDatabaseSharedManager]
> Read
> checkpoint status
> [startMarker=/data/ignite/persistence/node00-33503bf4-
> 323b-4965-8bb1-31597d3bedf4/cp/1532374156904-fb69dc16-
> 1947-411c-bc5e-ee6540ba8e53-START.bin,
> endMarker=/data/ignite/persistence/node00-33503bf4-
> 323b-4965-8bb1-31597d3bedf4/cp/1532374156904-fb69dc16-
> 1947-411c-bc5e-ee6540ba8e53-END.bin]
> [2018-07-24T02:57:39,465][INFO ][main][PageMemoryImpl] Started page memory
> [memoryAllocated=100.0 MiB, pages=24808, tableSize=1.9 MiB,
> checkpointBuffer=100.0 MiB]
> [2018-07-24T02:57:39,465][INFO ][main][GridCacheDatabaseSharedManager]
> Checking memory state [lastValidPos=FileWALPointer [idx=21672,
> fileOff=13729571, len=27723], lastMarked=FileWALPointer [idx=21672,
> fileOff=13729571, len=27723],
> lastCheckpointId=fb69dc16-1947-411c-bc5e-ee6540ba8e53]
> [2018-07-24T02:57:39,544][WARN ][main][FileWriteAheadLogManager] WAL
> segment
> tail is reached. [ Expected next state: {Index=21672,Offset=13757294},
> Actual state : {Index=3690196541643296009,Offset=603992117} ]
> [2018-07-24T02:57:39,544][INFO ][main][GridCacheDatabaseSharedManager]
> Found
> last checkpoint marker [cpId=fb69dc16-1947-411c-bc5e-ee6540ba8e53,
> pos=FileWALPointer [idx=21672, fileOff=13729571, len=27723]]
> [2018-07-24T02:57:39,569][INFO ][main][GridCacheDatabaseSharedManager]
> Applying lost cache updates since last checkpoint record
> [lastMarked=FileWALPointer [idx=21672, fileOff=13729571, len=27723],
> lastCheckpointId=fb69dc16-1947-411c-bc5e-ee6540ba8e53]
> [2018-07-24T02:57:39,628][WARN ][main][FileWriteAheadLogManager] WAL
> segment
> tail is reached. [ Expected next state: {Index=21672,Offset=13757294},
> Actual state : {Index=3690196541643296009,Offset=603992117} ]
> [2018-07-24T02:57:39,629][INFO ][main][GridCacheDatabaseSharedManager]
> Finished applying WAL changes [updatesApplied=0, time=61ms]
> [2018-07-24T02:57:39,666][INFO ][main][GridClusterStateProcessor]
> Restoring
> history for BaselineTopology[id=0]
> [2018-07-24T02:57:39,752][INFO ][main][ClientListenerProcessor] Client
> connector processor has started on TCP port 10800
> [2018-07-24T02:57:39,790][INFO ][main][GridTcpRestProtocol] Command
> protocol
> successfully started [name=TCP binary, host=0.0.0.0/0.0.0.0, port=11211]
> [2018-07-24T02:57:39,939][INFO ][main][GridJettyRestProtocol] Command
> protocol successfully started [name=Jetty REST, host=/0.0.0.0, port=8080]
> [2018-07-24T02:57:39,977][INFO ][main][IgniteKernal] Non-loopback local
> IPs:
> 10.252.10.4
> [2018-07-24T02:57:39,977][INFO ][main][IgniteKernal] Enabled local MACs:
> 7079B364407D
> [2018-07-24T02:57:40,098][INFO ][tcp-disco-srvr-#2][TcpDiscoverySpi] TCP
> discovery accepted incoming connection [rmtAddr=/10.252.10.20,
> rmtPort=51032]
> [2018-07-24T02:57:40,107][INFO ][tcp-disco-srvr-#2][TcpDiscoverySpi] TCP
> discovery spawning a new thread for connection [rmtAddr=/10.252.10.20,
> rmtPort=51032]
> [2018-07-24T02:57:40,107][INFO ][tcp-disco-sock-reader-#4][
> TcpDiscoverySpi]
> Started serving remote node connection [rmtAddr=/10.252.10.20:51032,
> rmtPort=51032]
> [2018-07-24T02:57:40,329][ERROR][tcp-disco-msg-worker-#3][TcpDiscoverySpi]
> TcpDiscoverSpi's message worker thread failed abnormally. Stopping the node
> in order to prevent cluster wide instability.
> org.apache.ignite.IgniteException: Node with BaselineTopology cannot join
> mixed cluster running in compatibility mode
>         at
> org.apache.ignite.internal.processors.cluster.GridClusterStateProcessor.
> onGridDataReceived(GridClusterStateProcessor.java:714)
> ~[ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$5.
> onExchange(GridDiscoveryManager.java:883)
> ~[ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi.
> onExchange(TcpDiscoverySpi.java:1939)
> ~[ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.
> processNodeAddedMessage(ServerImpl.java:4354)
> ~[ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.
> processMessage(ServerImpl.java:2744)
> ~[ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.
> processMessage(ServerImpl.java:2536)
> ~[ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.spi.discovery.tcp.ServerImpl$MessageWorkerAdapter.body(
> ServerImpl.java:6775)
> ~[ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.body(
> ServerImpl.java:2621)
> [ignite-core-2.6.0.jar:2.6.0]
>         at org.apache.ignite.spi.IgniteSpiThread.run(
> IgniteSpiThread.java:62)
> [ignite-core-2.6.0.jar:2.6.0]
> [2018-07-24T02:57:40,333][ERROR][tcp-disco-msg-worker-#3][] Critical
> system
> error detected. Will be handled accordingly to configured handler
> [hnd=class
> o.a.i.failure.StopNodeOrHaltFailureHandler, failureCtx=FailureContext
> [type=SYSTEM_WORKER_TERMINATION, err=class o.a.i.IgniteException: Node
> with
> BaselineTopology cannot join mixed cluster running in compatibility mode]]
> org.apache.ignite.IgniteException: Node with BaselineTopology cannot join
> mixed cluster running in compatibility mode
>         at
> org.apache.ignite.internal.processors.cluster.GridClusterStateProcessor.
> onGridDataReceived(GridClusterStateProcessor.java:714)
> ~[ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$5.
> onExchange(GridDiscoveryManager.java:883)
> ~[ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi.
> onExchange(TcpDiscoverySpi.java:1939)
> ~[ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.
> processNodeAddedMessage(ServerImpl.java:4354)
> ~[ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.
> processMessage(ServerImpl.java:2744)
> ~[ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.
> processMessage(ServerImpl.java:2536)
> ~[ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.spi.discovery.tcp.ServerImpl$MessageWorkerAdapter.body(
> ServerImpl.java:6775)
> ~[ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.spi.discovery.tcp.ServerImpl$RingMessageWorker.body(
> ServerImpl.java:2621)
> [ignite-core-2.6.0.jar:2.6.0]
>         at org.apache.ignite.spi.IgniteSpiThread.run(
> IgniteSpiThread.java:62)
> [ignite-core-2.6.0.jar:2.6.0]
> [2018-07-24T02:57:40,334][ERROR][main][IgniteKernal] Failed to start
> manager: GridManagerAdapter [enabled=true,
> name=o.a.i.i.managers.discovery.GridDiscoveryManager]
> org.apache.ignite.IgniteCheckedException: Failed to start SPI:
> TcpDiscoverySpi [addrRslvr=null, sockTimeout=5000, ackTimeout=5000,
> marsh=JdkMarshaller
> [clsFilter=org.apache.ignite.marshaller.MarshallerUtils$1@64a9d48c],
> reconCnt=10, reconDelay=2000, maxAckTimeout=600000, forceSrvMode=false,
> clientReconnectDisabled=false, internalLsnr=null]
>         at
> org.apache.ignite.internal.managers.GridManagerAdapter.
> startSpi(GridManagerAdapter.java:300)
> ~[ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.internal.managers.discovery.GridDiscoveryManager.start(
> GridDiscoveryManager.java:915)
> ~[ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.internal.IgniteKernal.startManager(
> IgniteKernal.java:1721)
> [ignite-core-2.6.0.jar:2.6.0]
>         at org.apache.ignite.internal.IgniteKernal.start(
> IgniteKernal.java:1028)
> [ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.start0(
> IgnitionEx.java:2014)
> [ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.start(
> IgnitionEx.java:1723)
> [ignite-core-2.6.0.jar:2.6.0]
>         at org.apache.ignite.internal.IgnitionEx.start0(IgnitionEx.
> java:1151)
> [ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.internal.IgnitionEx.startConfigurations(
> IgnitionEx.java:1069)
> [ignite-core-2.6.0.jar:2.6.0]
>         at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.
> java:955)
> [ignite-core-2.6.0.jar:2.6.0]
>         at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.
> java:854)
> [ignite-core-2.6.0.jar:2.6.0]
>         at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.
> java:724)
> [ignite-core-2.6.0.jar:2.6.0]
>         at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.
> java:693)
> [ignite-core-2.6.0.jar:2.6.0]
>         at org.apache.ignite.Ignition.start(Ignition.java:352)
> [ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.startup.cmdline.CommandLineStartup.
> main(CommandLineStartup.java:301)
> [ignite-core-2.6.0.jar:2.6.0]
> Caused by: org.apache.ignite.spi.IgniteSpiException: Thread has been
> interrupted.
>         at
> org.apache.ignite.spi.discovery.tcp.ServerImpl.
> joinTopology(ServerImpl.java:938)
> ~[ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.spi.discovery.tcp.ServerImpl.
> spiStart(ServerImpl.java:373)
> ~[ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi.
> spiStart(TcpDiscoverySpi.java:1948)
> ~[ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.internal.managers.GridManagerAdapter.
> startSpi(GridManagerAdapter.java:297)
> ~[ignite-core-2.6.0.jar:2.6.0]
>         ... 13 more
> [2018-07-24T02:57:40,336][ERROR][tcp-disco-msg-worker-#3][] JVM will be
> halted immediately due to the failure: [failureCtx=FailureContext
> [type=SYSTEM_WORKER_TERMINATION, err=class o.a.i.IgniteException: Node
> with
> BaselineTopology cannot join mixed cluster running in compatibility mode]]
> [2018-07-24T02:57:40,335][ERROR][main][IgniteKernal] Got exception while
> starting (will rollback startup routine).
> org.apache.ignite.IgniteCheckedException: Failed to start manager:
> GridManagerAdapter [enabled=true,
> name=org.apache.ignite.internal.managers.discovery.GridDiscoveryManager]
>         at
> org.apache.ignite.internal.IgniteKernal.startManager(
> IgniteKernal.java:1726)
> ~[ignite-core-2.6.0.jar:2.6.0]
>         at org.apache.ignite.internal.IgniteKernal.start(
> IgniteKernal.java:1028)
> [ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.start0(
> IgnitionEx.java:2014)
> [ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.start(
> IgnitionEx.java:1723)
> [ignite-core-2.6.0.jar:2.6.0]
>         at org.apache.ignite.internal.IgnitionEx.start0(IgnitionEx.
> java:1151)
> [ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.internal.IgnitionEx.startConfigurations(
> IgnitionEx.java:1069)
> [ignite-core-2.6.0.jar:2.6.0]
>         at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.
> java:955)
> [ignite-core-2.6.0.jar:2.6.0]
>         at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.
> java:854)
> [ignite-core-2.6.0.jar:2.6.0]
>         at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.
> java:724)
> [ignite-core-2.6.0.jar:2.6.0]
>         at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.
> java:693)
> [ignite-core-2.6.0.jar:2.6.0]
>         at org.apache.ignite.Ignition.start(Ignition.java:352)
> [ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.startup.cmdline.CommandLineStartup.
> main(CommandLineStartup.java:301)
> [ignite-core-2.6.0.jar:2.6.0]
> Caused by: org.apache.ignite.IgniteCheckedException: Failed to start SPI:
> TcpDiscoverySpi [addrRslvr=null, sockTimeout=5000, ackTimeout=5000,
> marsh=JdkMarshaller
> [clsFilter=org.apache.ignite.marshaller.MarshallerUtils$1@64a9d48c],
> reconCnt=10, reconDelay=2000, maxAckTimeout=600000, forceSrvMode=false,
> clientReconnectDisabled=false, internalLsnr=null]
>         at
> org.apache.ignite.internal.managers.GridManagerAdapter.
> startSpi(GridManagerAdapter.java:300)
> ~[ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.internal.managers.discovery.GridDiscoveryManager.start(
> GridDiscoveryManager.java:915)
> ~[ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.internal.IgniteKernal.startManager(
> IgniteKernal.java:1721)
> ~[ignite-core-2.6.0.jar:2.6.0]
>         ... 11 more
> Caused by: org.apache.ignite.spi.IgniteSpiException: Thread has been
> interrupted.
>         at
> org.apache.ignite.spi.discovery.tcp.ServerImpl.
> joinTopology(ServerImpl.java:938)
> ~[ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.spi.discovery.tcp.ServerImpl.
> spiStart(ServerImpl.java:373)
> ~[ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi.
> spiStart(TcpDiscoverySpi.java:1948)
> ~[ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.internal.managers.GridManagerAdapter.
> startSpi(GridManagerAdapter.java:297)
> ~[ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.internal.managers.discovery.GridDiscoveryManager.start(
> GridDiscoveryManager.java:915)
> ~[ignite-core-2.6.0.jar:2.6.0]
>         at
> org.apache.ignite.internal.IgniteKernal.startManager(
> IgniteKernal.java:1721)
> ~[ignite-core-2.6.0.jar:2.6.0]
>         ... 11 more
>
>
> The next step I tried is stop node1 and node3, then start
> node2,node1,node3.
> The cluster is up but after a while there's this message in log "Partition
> states validation has failed" and the cluster starts to rebalance.
>
> I think the data and partition information in node2 is outdated because
> when
> node2 is down there's new data ingested into node1 and node3.
>
> But after a while there's this message in log "[WARN
> ][exchange-worker-#162][GridDhtPartitionsExchangeFuture] Unable to await
> partitions release latch within timeout: ServerLatch [permits=2,
> pendingAcks=[0f687998-bb44-4d6c-8cac-c3dd05b28b9b,
> f1eaed2f-d2b3-429f-8e97-ac150e106d3e], super=CompletableLatch
> [id=exchange,
> topVer=AffinityTopologyVersion [topVer=4, minorTopVer=0]]]" and the cluster
> freezes.
> Here's the detailed log.
>
> second.log
> <http://apache-ignite-users.70518.x6.nabble.com/file/t1346/second.log>
>
> I have to stop the cluster and try to reboot again.
> The cluster is up and I ingested some data into Ignite using Spark
> dataframe
> API and triggered a checkpoint.
> And the error message is the same as my second try, also the cluster
> freezed.
> Here's the detailed log.
> third.log
> <http://apache-ignite-users.70518.x6.nabble.com/file/t1346/third.log>
>
> When I try to reboot again.
> Now the cluster is in unrecoverable model.
> Here's the detailed log.
> fourth.log
> <http://apache-ignite-users.70518.x6.nabble.com/file/t1346/fourth.log>
>
> And this is files in the checkpoint folder.
> ll -t
> /data/ignite/persistence/node00-33503bf4-323b-4965-8bb1-31597d3bedf4/cp
> total 232
> -rw------- 1 root root 16 Jul 24 05:15
> 1532409300819-4c35d510-f4ef-4a9d-8311-91afbe08cdb2-START.bin
> -rw------- 1 root root 16 Jul 24 05:14 1532409286225-node-started.bin
> -rw------- 1 root root 16 Jul 24 05:13 1532409199626-node-started.bin
> -rw------- 1 root root 16 Jul 24 05:13
> 1532409179923-e8332756-9efc-4baa-b854-722d858fe879-END.bin
> -rw------- 1 root root 16 Jul 24 05:12
> 1532409179923-e8332756-9efc-4baa-b854-722d858fe879-START.bin
> -rw------- 1 root root 16 Jul 24 05:12 1532409166705-node-started.bin
> -rw------- 1 root root 16 Jul 24 04:30 1532406650280-node-started.bin
> -rw------- 1 root root 16 Jul 24 04:30
> 1532406379589-0f14ace1-476c-4751-9237-c32b44409348-END.bin
> -rw------- 1 root root 16 Jul 24 04:26
> 1532406379589-0f14ace1-476c-4751-9237-c32b44409348-START.bin
> -rw------- 1 root root 16 Jul 24 04:20
> 1532405902580-4f0d9446-df20-4327-89f5-66db3c3d231d-END.bin
> -rw------- 1 root root 16 Jul 24 04:18
> 1532405902580-4f0d9446-df20-4327-89f5-66db3c3d231d-START.bin
> -rw------- 1 root root 16 Jul 24 04:11
> 1532405301597-7ed8ec28-4761-431b-a702-3401db38e43e-END.bin
> -rw------- 1 root root 16 Jul 24 04:08
> 1532405301597-7ed8ec28-4761-431b-a702-3401db38e43e-START.bin
> -rw------- 1 root root 16 Jul 24 04:05
> 1532404700308-38bfb4f8-ffe2-4fca-a4ac-488146fb8bc6-END.bin
> -rw------- 1 root root 16 Jul 24 03:58
> 1532404700308-38bfb4f8-ffe2-4fca-a4ac-488146fb8bc6-START.bin
> -rw------- 1 root root 16 Jul 24 03:48
> 1532404100330-20d1e18a-6c2b-468b-8058-913140a73778-END.bin
> -rw------- 1 root root 16 Jul 24 03:48
> 1532404100330-20d1e18a-6c2b-468b-8058-913140a73778-START.bin
> -rw------- 1 root root 16 Jul 24 03:38
> 1532403500142-500c576d-f6b6-4677-aa20-b2ea27275241-END.bin
> -rw------- 1 root root 16 Jul 24 03:38
> 1532403500142-500c576d-f6b6-4677-aa20-b2ea27275241-START.bin
> -rw------- 1 root root 16 Jul 24 03:28
> 1532402900158-21ff15b0-f404-402a-8ae3-7daf68b0bd7c-END.bin
> -rw------- 1 root root 16 Jul 24 03:28
> 1532402900158-21ff15b0-f404-402a-8ae3-7daf68b0bd7c-START.bin
> -rw------- 1 root root 16 Jul 24 03:18
> 1532402300056-a6109747-fe76-4b6b-869c-ea7529ee42fb-END.bin
> -rw------- 1 root root 16 Jul 24 03:18
> 1532402300056-a6109747-fe76-4b6b-869c-ea7529ee42fb-START.bin
> -rw------- 1 root root 16 Jul 24 03:17 1532402278723-node-started.bin
> -rw------- 1 root root 16 Jul 24 03:17
> 1532401832758-43fc7608-a11e-4176-a879-37a1ee0f9f37-END.bin
> -rw------- 1 root root 16 Jul 24 03:10
> 1532401832758-43fc7608-a11e-4176-a879-37a1ee0f9f37-START.bin
> -rw------- 1 root root 16 Jul 24 03:00
> 1532401232746-5f6483fe-4d1b-4426-8e25-28d3dc1b156b-END.bin
> -rw------- 1 root root 16 Jul 24 03:00
> 1532401232746-5f6483fe-4d1b-4426-8e25-28d3dc1b156b-START.bin
> -rw------- 1 root root 16 Jul 24 03:00 1532401217918-node-started.bin
> -rw------- 1 root root 16 Jul 23 19:29
> 1532374156904-fb69dc16-1947-411c-bc5e-ee6540ba8e53-END.bin
> -rw------- 1 root root 16 Jul 23 19:29
> 1532374156904-fb69dc16-1947-411c-bc5e-ee6540ba8e53-START.bin
> -rw------- 1 root root 16 Jul 23 17:19
> 1532366356784-bf07f598-0aa4-4503-a594-336b8eccbdbe-END.bin
> -rw------- 1 root root 16 Jul 23 17:19
> 1532366356784-bf07f598-0aa4-4503-a594-336b8eccbdbe-START.bin
> -rw------- 1 root root 16 Jul 23 17:09
> 1532365756812-ff66bf97-c4fd-4ae3-9aa0-872fb09eb610-END.bin
> -rw------- 1 root root 16 Jul 23 17:09
> 1532365756812-ff66bf97-c4fd-4ae3-9aa0-872fb09eb610-START.bin
> -rw------- 1 root root 16 Jul 23 16:59
> 1532365156819-9774fab8-da28-4188-80ee-a5753668b52a-END.bin
> -rw------- 1 root root 16 Jul 23 16:59
> 1532365156819-9774fab8-da28-4188-80ee-a5753668b52a-START.bin
> -rw------- 1 root root 16 Jul 23 16:49
> 1532364556960-64ee6f39-dc21-4fa2-b472-c00d3d738a87-END.bin
> -rw------- 1 root root 16 Jul 23 16:49
> 1532364556960-64ee6f39-dc21-4fa2-b472-c00d3d738a87-START.bin
> -rw------- 1 root root 16 Jul 23 16:39
> 1532363956827-d9a3189d-586f-437f-9d86-463bdb23ae1f-END.bin
> -rw------- 1 root root 16 Jul 23 16:39
> 1532363956827-d9a3189d-586f-437f-9d86-463bdb23ae1f-START.bin
>
>
>
> --
> Sent from: http://apache-ignite-users.70518.x6.nabble.com/
>

Re: Ignite data can't be recovered after node fail

Posted by smovva <su...@sturfee.com>.
Where you able to resolve this? I'm in a very similar situation.




--
Sent from: http://apache-ignite-users.70518.x6.nabble.com/