You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@ignite.apache.org by GitBox <gi...@apache.org> on 2022/12/02 16:44:09 UTC

[GitHub] [ignite] Mmuzaf opened a new pull request, #10420: IGNITE-17738 Fix partitions lwm on cluster crush

Mmuzaf opened a new pull request, #10420:
URL: https://github.com/apache/ignite/pull/10420

   Thank you for submitting the pull request to the Apache Ignite.
   
   In order to streamline the review of the contribution 
   we ask you to ensure the following steps have been taken:
   
   ### The Contribution Checklist
   - [ ] There is a single JIRA ticket related to the pull request. 
   - [ ] The web-link to the pull request is attached to the JIRA ticket.
   - [ ] The JIRA ticket has the _Patch Available_ state.
   - [ ] The pull request body describes changes that have been made. 
   The description explains _WHAT_ and _WHY_ was made instead of _HOW_.
   - [ ] The pull request title is treated as the final commit message. 
   The following pattern must be used: `IGNITE-XXXX Change summary` where `XXXX` - number of JIRA issue.
   - [ ] A reviewer has been mentioned through the JIRA comments 
   (see [the Maintainers list](https://cwiki.apache.org/confluence/display/IGNITE/How+to+Contribute#HowtoContribute-ReviewProcessandMaintainers)) 
   - [ ] The pull request has been checked by the Teamcity Bot and 
   the `green visa` attached to the JIRA ticket (see [TC.Bot: Check PR](https://mtcga.gridgain.com/prs.html))
   
   ### Notes
   - [How to Contribute](https://cwiki.apache.org/confluence/display/IGNITE/How+to+Contribute)
   - [Coding abbreviation rules](https://cwiki.apache.org/confluence/display/IGNITE/Abbreviation+Rules)
   - [Coding Guidelines](https://cwiki.apache.org/confluence/display/IGNITE/Coding+Guidelines)
   - [Apache Ignite Teamcity Bot](https://cwiki.apache.org/confluence/display/IGNITE/Apache+Ignite+Teamcity+Bot)
   
   If you need any help, please email dev@ignite.apache.org or ask anу advice on http://asf.slack.com _#ignite_ channel.
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@ignite.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [ignite] anton-vinogradov commented on a diff in pull request #10420: IGNITE-17738 Fix partitions lwm on cluster crush

Posted by GitBox <gi...@apache.org>.
anton-vinogradov commented on code in PR #10420:
URL: https://github.com/apache/ignite/pull/10420#discussion_r1039856184


##########
modules/control-utility/src/test/java/org/apache/ignite/util/GridCommandHandlerConsistencyOnClusterCrashTest.java:
##########
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.util;
+
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+import org.apache.ignite.Ignite;
+import org.apache.ignite.IgniteCache;
+import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
+import org.apache.ignite.cluster.ClusterNode;
+import org.apache.ignite.cluster.ClusterState;
+import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.configuration.IgniteConfiguration;
+import org.apache.ignite.configuration.WALMode;
+import org.apache.ignite.failure.StopNodeFailureHandler;
+import org.apache.ignite.internal.IgniteEx;
+import org.apache.ignite.internal.TestRecordingCommunicationSpi;
+import org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTxFinishRequest;
+import org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTxPrepareRequest;
+import org.apache.ignite.internal.util.typedef.G;
+import org.apache.ignite.lang.IgniteBiPredicate;
+import org.apache.ignite.plugin.extensions.communication.Message;
+import org.apache.ignite.testframework.GridTestUtils;
+import org.apache.ignite.testframework.ListeningTestLogger;
+import org.junit.Before;
+import org.junit.Test;
+import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL;
+import static org.apache.ignite.cache.CacheWriteSynchronizationMode.FULL_SYNC;
+import static org.apache.ignite.internal.commandline.CommandHandler.EXIT_CODE_OK;
+
+/** */
+public class GridCommandHandlerConsistencyOnClusterCrashTest extends GridCommandHandlerClusterPerMethodAbstractTest {
+    /** Listening logger. */
+    protected final ListeningTestLogger listeningLog = new ListeningTestLogger(log);
+
+    /** Number of cluster nodes. */
+    public int nodes = 3;
+
+    /** Number of backups for the default cache. */
+    public int backupNodes = nodes - 1;
+
+    /** */
+    @Before public void beforeEachTest() throws Exception {
+        cleanPersistenceDir();
+
+        injectTestSystemOut();
+    }
+
+    /** {@inheritDoc} */
+    @Override protected IgniteConfiguration getConfiguration(String igniteInstanceName) throws Exception {
+        IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName);
+
+        cfg.setGridLogger(listeningLog)
+            .setFailureHandler(new StopNodeFailureHandler());
+
+        cfg.getDataStorageConfiguration()
+            .setFileIOFactory(new ThrowableFileIOTestFactory(cfg.getDataStorageConfiguration().getFileIOFactory()))
+            .setWalMode(WALMode.FSYNC); // Allows to use special IO at WAL as well.
+
+        cfg.setCacheConfiguration(new CacheConfiguration<>()
+            .setAffinity(new RendezvousAffinityFunction(false, 1))
+            .setBackups(backupNodes)
+            .setName(DEFAULT_CACHE_NAME)
+            .setAtomicityMode(TRANSACTIONAL)
+            .setWriteSynchronizationMode(FULL_SYNC) // Allows to be sure that all messages are sent when put succeed.
+            .setReadFromBackup(true)); // Allows to check values on backups for idle_verify.
+
+        return cfg;
+    }
+
+    /** */
+    @Test
+    public void testLwmCountersOnClusterCrash() throws Exception {
+        int prepareRqNum = 20;
+        int finishRqNum = 30;
+
+        IgniteEx ignite = startGrids(nodes);
+        ignite.cluster().state(ClusterState.ACTIVE);
+
+        final AtomicInteger updateCnt = new AtomicInteger();
+
+        // Enough to have historical rebalance when needed.
+        for (int i = 0; i < 2_000; i++)
+            ignite.cache(DEFAULT_CACHE_NAME).put(updateCnt.incrementAndGet(), 0);
+
+        stopAllGrids();
+        startGrids(nodes);
+
+        Ignite prim = primaryNode(0L, DEFAULT_CACHE_NAME);
+
+        CountDownLatch prepareLatch = new CountDownLatch(backupNodes * prepareRqNum);
+        CountDownLatch finishLatch = new CountDownLatch(backupNodes * finishRqNum);
+
+        TestRecordingCommunicationSpi.spi(prim).blockMessages(new IgniteBiPredicate<ClusterNode, Message>() {
+            @Override public boolean apply(ClusterNode node, Message msg) {
+                if (msg instanceof GridDhtTxPrepareRequest && prepareLatch.getCount() > 0) {
+                    prepareLatch.countDown();
+
+                    return true;
+                }
+                else if (msg instanceof GridDhtTxFinishRequest && finishLatch.getCount() > 0) {
+                    finishLatch.countDown();
+
+                    return true;
+                }
+                else
+                    return false;
+            }
+        });
+
+        IgniteCache<Integer, Integer> primCache = prim.cache(DEFAULT_CACHE_NAME);
+
+        for (int i = 0; i < prepareRqNum; i++)
+            GridTestUtils.runAsync(() -> primCache.put(updateCnt.incrementAndGet(), 0));
+
+        prepareLatch.await(getTestTimeout(), TimeUnit.MILLISECONDS);
+
+        for (int i = 0; i < finishRqNum; i++)
+            GridTestUtils.runAsync(() -> primCache.put(updateCnt.incrementAndGet(), 0));
+
+        finishLatch.await(getTestTimeout(), TimeUnit.MILLISECONDS);
+
+        G.allGrids().forEach(
+            node -> ((ThrowableFileIOTestFactory)node.configuration().getDataStorageConfiguration().getFileIOFactory())
+                .setThrowEx(true));
+
+        stopAllGrids();

Review Comment:
   Counters should be measured before the cluster restart to make sure what corner case this test solves.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@ignite.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [ignite] anton-vinogradov commented on a diff in pull request #10420: IGNITE-17738 Fix partitions lwm on cluster crush

Posted by GitBox <gi...@apache.org>.
anton-vinogradov commented on code in PR #10420:
URL: https://github.com/apache/ignite/pull/10420#discussion_r1039787958


##########
modules/control-utility/src/test/java/org/apache/ignite/util/GridCommandHandlerConsistencyCountersTest.java:
##########
@@ -214,6 +166,8 @@ public BlockableFileIOFactory(FileIOFactory factory) {
      */
     @Test
     public void testCountersOnCrachRecovery() throws Exception {
+        Assume.assumeFalse(atomicityMode == TRANSACTIONAL);

Review Comment:
   Assume true Atomic?  :) 



##########
modules/control-utility/src/test/java/org/apache/ignite/util/GridCommandHandlerConsistencyCountersTest.java:
##########
@@ -154,47 +147,6 @@ public static Iterable<Object[]> data() {
         return cfg;
     }
 
-    /**
-     *
-     */
-    private static class BlockableFileIOFactory implements FileIOFactory {
-        /** IO Factory. */
-        private final FileIOFactory factory;
-
-        /**
-         * @param factory Factory.
-         */
-        public BlockableFileIOFactory(FileIOFactory factory) {
-            this.factory = factory;
-        }
-
-        /** {@inheritDoc} */
-        @Override public FileIO create(File file, OpenOption... modes) throws IOException {
-            return new FileIODecorator(factory.create(file, modes)) {
-                @Override public int write(ByteBuffer srcBuf) throws IOException {
-                    if (ioBlocked)

Review Comment:
   ioBlocked field has no usages for now



##########
modules/control-utility/src/test/java/org/apache/ignite/util/GridCommandHandlerConsistencyOnClusterCrashTest.java:
##########
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.util;
+
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+import org.apache.ignite.Ignite;
+import org.apache.ignite.IgniteCache;
+import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
+import org.apache.ignite.cluster.ClusterNode;
+import org.apache.ignite.cluster.ClusterState;
+import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.configuration.IgniteConfiguration;
+import org.apache.ignite.configuration.WALMode;
+import org.apache.ignite.failure.StopNodeFailureHandler;
+import org.apache.ignite.internal.IgniteEx;
+import org.apache.ignite.internal.TestRecordingCommunicationSpi;
+import org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTxFinishRequest;
+import org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTxPrepareRequest;
+import org.apache.ignite.internal.util.typedef.G;
+import org.apache.ignite.lang.IgniteBiPredicate;
+import org.apache.ignite.plugin.extensions.communication.Message;
+import org.apache.ignite.testframework.GridTestUtils;
+import org.apache.ignite.testframework.ListeningTestLogger;
+import org.junit.Before;
+import org.junit.Test;
+import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL;
+import static org.apache.ignite.cache.CacheWriteSynchronizationMode.FULL_SYNC;
+import static org.apache.ignite.internal.commandline.CommandHandler.EXIT_CODE_OK;
+
+/** */
+public class GridCommandHandlerConsistencyOnClusterCrashTest extends GridCommandHandlerClusterPerMethodAbstractTest {
+    /** Listening logger. */
+    protected final ListeningTestLogger listeningLog = new ListeningTestLogger(log);
+
+    /** Number of cluster nodes. */
+    public int nodes = 3;
+
+    /** Number of backups for the default cache. */
+    public int backupNodes = nodes - 1;
+
+    /** */
+    @Before public void beforeEachTest() throws Exception {
+        cleanPersistenceDir();
+
+        injectTestSystemOut();
+    }
+
+    /** {@inheritDoc} */
+    @Override protected IgniteConfiguration getConfiguration(String igniteInstanceName) throws Exception {
+        IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName);
+
+        cfg.setGridLogger(listeningLog)
+            .setFailureHandler(new StopNodeFailureHandler());
+
+        cfg.getDataStorageConfiguration()
+            .setFileIOFactory(new ThrowableFileIOTestFactory(cfg.getDataStorageConfiguration().getFileIOFactory()))
+            .setWalMode(WALMode.FSYNC); // Allows to use special IO at WAL as well.
+
+        cfg.setCacheConfiguration(new CacheConfiguration<>()
+            .setAffinity(new RendezvousAffinityFunction(false, 1))
+            .setBackups(backupNodes)
+            .setName(DEFAULT_CACHE_NAME)
+            .setAtomicityMode(TRANSACTIONAL)
+            .setWriteSynchronizationMode(FULL_SYNC) // Allows to be sure that all messages are sent when put succeed.
+            .setReadFromBackup(true)); // Allows to check values on backups for idle_verify.
+
+        return cfg;
+    }
+
+    /** */
+    @Test
+    public void testLwmCountersOnClusterCrash() throws Exception {
+        int prepareRqNum = 20;
+        int finishRqNum = 30;
+
+        IgniteEx ignite = startGrids(nodes);
+        ignite.cluster().state(ClusterState.ACTIVE);
+
+        final AtomicInteger updateCnt = new AtomicInteger();
+
+        // Enough to have historical rebalance when needed.
+        for (int i = 0; i < 2_000; i++)
+            ignite.cache(DEFAULT_CACHE_NAME).put(updateCnt.incrementAndGet(), 0);
+
+        stopAllGrids();
+        startGrids(nodes);
+
+        Ignite prim = primaryNode(0L, DEFAULT_CACHE_NAME);
+
+        CountDownLatch prepareLatch = new CountDownLatch(backupNodes * prepareRqNum);
+        CountDownLatch finishLatch = new CountDownLatch(backupNodes * finishRqNum);
+
+        TestRecordingCommunicationSpi.spi(prim).blockMessages(new IgniteBiPredicate<ClusterNode, Message>() {
+            @Override public boolean apply(ClusterNode node, Message msg) {
+                if (msg instanceof GridDhtTxPrepareRequest && prepareLatch.getCount() > 0) {
+                    prepareLatch.countDown();
+
+                    return true;
+                }
+                else if (msg instanceof GridDhtTxFinishRequest && finishLatch.getCount() > 0) {
+                    finishLatch.countDown();
+
+                    return true;
+                }
+                else
+                    return false;
+            }
+        });
+
+        IgniteCache<Integer, Integer> primCache = prim.cache(DEFAULT_CACHE_NAME);
+
+        for (int i = 0; i < prepareRqNum; i++)
+            GridTestUtils.runAsync(() -> primCache.put(updateCnt.incrementAndGet(), 0));
+
+        prepareLatch.await(getTestTimeout(), TimeUnit.MILLISECONDS);
+
+        for (int i = 0; i < finishRqNum; i++)
+            GridTestUtils.runAsync(() -> primCache.put(updateCnt.incrementAndGet(), 0));
+
+        finishLatch.await(getTestTimeout(), TimeUnit.MILLISECONDS);

Review Comment:
   You never check the result of waiting



##########
modules/control-utility/src/test/java/org/apache/ignite/util/GridCommandHandlerConsistencyOnClusterCrashTest.java:
##########
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.util;
+
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+import org.apache.ignite.Ignite;
+import org.apache.ignite.IgniteCache;
+import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
+import org.apache.ignite.cluster.ClusterNode;
+import org.apache.ignite.cluster.ClusterState;
+import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.configuration.IgniteConfiguration;
+import org.apache.ignite.configuration.WALMode;
+import org.apache.ignite.failure.StopNodeFailureHandler;
+import org.apache.ignite.internal.IgniteEx;
+import org.apache.ignite.internal.TestRecordingCommunicationSpi;
+import org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTxFinishRequest;
+import org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTxPrepareRequest;
+import org.apache.ignite.internal.util.typedef.G;
+import org.apache.ignite.lang.IgniteBiPredicate;
+import org.apache.ignite.plugin.extensions.communication.Message;
+import org.apache.ignite.testframework.GridTestUtils;
+import org.apache.ignite.testframework.ListeningTestLogger;
+import org.junit.Before;
+import org.junit.Test;
+import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL;
+import static org.apache.ignite.cache.CacheWriteSynchronizationMode.FULL_SYNC;
+import static org.apache.ignite.internal.commandline.CommandHandler.EXIT_CODE_OK;
+
+/** */
+public class GridCommandHandlerConsistencyOnClusterCrashTest extends GridCommandHandlerClusterPerMethodAbstractTest {
+    /** Listening logger. */
+    protected final ListeningTestLogger listeningLog = new ListeningTestLogger(log);
+
+    /** Number of cluster nodes. */
+    public int nodes = 3;
+
+    /** Number of backups for the default cache. */
+    public int backupNodes = nodes - 1;
+
+    /** */
+    @Before public void beforeEachTest() throws Exception {
+        cleanPersistenceDir();
+
+        injectTestSystemOut();
+    }
+
+    /** {@inheritDoc} */
+    @Override protected IgniteConfiguration getConfiguration(String igniteInstanceName) throws Exception {
+        IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName);
+
+        cfg.setGridLogger(listeningLog)
+            .setFailureHandler(new StopNodeFailureHandler());
+
+        cfg.getDataStorageConfiguration()
+            .setFileIOFactory(new ThrowableFileIOTestFactory(cfg.getDataStorageConfiguration().getFileIOFactory()))
+            .setWalMode(WALMode.FSYNC); // Allows to use special IO at WAL as well.
+
+        cfg.setCacheConfiguration(new CacheConfiguration<>()
+            .setAffinity(new RendezvousAffinityFunction(false, 1))
+            .setBackups(backupNodes)
+            .setName(DEFAULT_CACHE_NAME)
+            .setAtomicityMode(TRANSACTIONAL)
+            .setWriteSynchronizationMode(FULL_SYNC) // Allows to be sure that all messages are sent when put succeed.
+            .setReadFromBackup(true)); // Allows to check values on backups for idle_verify.
+
+        return cfg;
+    }
+
+    /** */
+    @Test
+    public void testLwmCountersOnClusterCrash() throws Exception {
+        int prepareRqNum = 20;
+        int finishRqNum = 30;

Review Comment:
   Why not just rn? :) 
   Let it be shorten to a `Req`, at least %) 



##########
modules/control-utility/src/test/java/org/apache/ignite/util/GridCommandHandlerConsistencyOnClusterCrashTest.java:
##########
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.util;
+
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+import org.apache.ignite.Ignite;
+import org.apache.ignite.IgniteCache;
+import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
+import org.apache.ignite.cluster.ClusterNode;
+import org.apache.ignite.cluster.ClusterState;
+import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.configuration.IgniteConfiguration;
+import org.apache.ignite.configuration.WALMode;
+import org.apache.ignite.failure.StopNodeFailureHandler;
+import org.apache.ignite.internal.IgniteEx;
+import org.apache.ignite.internal.TestRecordingCommunicationSpi;
+import org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTxFinishRequest;
+import org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTxPrepareRequest;
+import org.apache.ignite.internal.util.typedef.G;
+import org.apache.ignite.lang.IgniteBiPredicate;
+import org.apache.ignite.plugin.extensions.communication.Message;
+import org.apache.ignite.testframework.GridTestUtils;
+import org.apache.ignite.testframework.ListeningTestLogger;
+import org.junit.Before;
+import org.junit.Test;
+import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL;
+import static org.apache.ignite.cache.CacheWriteSynchronizationMode.FULL_SYNC;
+import static org.apache.ignite.internal.commandline.CommandHandler.EXIT_CODE_OK;
+
+/** */
+public class GridCommandHandlerConsistencyOnClusterCrashTest extends GridCommandHandlerClusterPerMethodAbstractTest {
+    /** Listening logger. */
+    protected final ListeningTestLogger listeningLog = new ListeningTestLogger(log);
+
+    /** Number of cluster nodes. */
+    public int nodes = 3;
+
+    /** Number of backups for the default cache. */
+    public int backupNodes = nodes - 1;
+
+    /** */
+    @Before public void beforeEachTest() throws Exception {
+        cleanPersistenceDir();
+
+        injectTestSystemOut();
+    }
+
+    /** {@inheritDoc} */
+    @Override protected IgniteConfiguration getConfiguration(String igniteInstanceName) throws Exception {
+        IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName);
+
+        cfg.setGridLogger(listeningLog)
+            .setFailureHandler(new StopNodeFailureHandler());
+
+        cfg.getDataStorageConfiguration()
+            .setFileIOFactory(new ThrowableFileIOTestFactory(cfg.getDataStorageConfiguration().getFileIOFactory()))
+            .setWalMode(WALMode.FSYNC); // Allows to use special IO at WAL as well.
+
+        cfg.setCacheConfiguration(new CacheConfiguration<>()
+            .setAffinity(new RendezvousAffinityFunction(false, 1))
+            .setBackups(backupNodes)
+            .setName(DEFAULT_CACHE_NAME)
+            .setAtomicityMode(TRANSACTIONAL)
+            .setWriteSynchronizationMode(FULL_SYNC) // Allows to be sure that all messages are sent when put succeed.
+            .setReadFromBackup(true)); // Allows to check values on backups for idle_verify.
+
+        return cfg;
+    }
+
+    /** */
+    @Test
+    public void testLwmCountersOnClusterCrash() throws Exception {
+        int prepareRqNum = 20;
+        int finishRqNum = 30;
+
+        IgniteEx ignite = startGrids(nodes);
+        ignite.cluster().state(ClusterState.ACTIVE);
+
+        final AtomicInteger updateCnt = new AtomicInteger();
+
+        // Enough to have historical rebalance when needed.
+        for (int i = 0; i < 2_000; i++)
+            ignite.cache(DEFAULT_CACHE_NAME).put(updateCnt.incrementAndGet(), 0);
+
+        stopAllGrids();
+        startGrids(nodes);
+
+        Ignite prim = primaryNode(0L, DEFAULT_CACHE_NAME);
+
+        CountDownLatch prepareLatch = new CountDownLatch(backupNodes * prepareRqNum);
+        CountDownLatch finishLatch = new CountDownLatch(backupNodes * finishRqNum);
+
+        TestRecordingCommunicationSpi.spi(prim).blockMessages(new IgniteBiPredicate<ClusterNode, Message>() {
+            @Override public boolean apply(ClusterNode node, Message msg) {
+                if (msg instanceof GridDhtTxPrepareRequest && prepareLatch.getCount() > 0) {
+                    prepareLatch.countDown();
+
+                    return true;
+                }
+                else if (msg instanceof GridDhtTxFinishRequest && finishLatch.getCount() > 0) {
+                    finishLatch.countDown();
+
+                    return true;
+                }
+                else
+                    return false;
+            }
+        });
+
+        IgniteCache<Integer, Integer> primCache = prim.cache(DEFAULT_CACHE_NAME);
+
+        for (int i = 0; i < prepareRqNum; i++)
+            GridTestUtils.runAsync(() -> primCache.put(updateCnt.incrementAndGet(), 0));
+
+        prepareLatch.await(getTestTimeout(), TimeUnit.MILLISECONDS);

Review Comment:
   You never check the result of waiting



##########
modules/control-utility/src/test/java/org/apache/ignite/util/GridCommandHandlerConsistencyOnClusterCrashTest.java:
##########
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.util;
+
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+import org.apache.ignite.Ignite;
+import org.apache.ignite.IgniteCache;
+import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
+import org.apache.ignite.cluster.ClusterNode;
+import org.apache.ignite.cluster.ClusterState;
+import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.configuration.IgniteConfiguration;
+import org.apache.ignite.configuration.WALMode;
+import org.apache.ignite.failure.StopNodeFailureHandler;
+import org.apache.ignite.internal.IgniteEx;
+import org.apache.ignite.internal.TestRecordingCommunicationSpi;
+import org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTxFinishRequest;
+import org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTxPrepareRequest;
+import org.apache.ignite.internal.util.typedef.G;
+import org.apache.ignite.lang.IgniteBiPredicate;
+import org.apache.ignite.plugin.extensions.communication.Message;
+import org.apache.ignite.testframework.GridTestUtils;
+import org.apache.ignite.testframework.ListeningTestLogger;
+import org.junit.Before;
+import org.junit.Test;
+import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL;
+import static org.apache.ignite.cache.CacheWriteSynchronizationMode.FULL_SYNC;
+import static org.apache.ignite.internal.commandline.CommandHandler.EXIT_CODE_OK;
+
+/** */
+public class GridCommandHandlerConsistencyOnClusterCrashTest extends GridCommandHandlerClusterPerMethodAbstractTest {
+    /** Listening logger. */
+    protected final ListeningTestLogger listeningLog = new ListeningTestLogger(log);
+
+    /** Number of cluster nodes. */
+    public int nodes = 3;
+
+    /** Number of backups for the default cache. */
+    public int backupNodes = nodes - 1;
+
+    /** */
+    @Before public void beforeEachTest() throws Exception {
+        cleanPersistenceDir();
+
+        injectTestSystemOut();
+    }
+
+    /** {@inheritDoc} */
+    @Override protected IgniteConfiguration getConfiguration(String igniteInstanceName) throws Exception {
+        IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName);
+
+        cfg.setGridLogger(listeningLog)
+            .setFailureHandler(new StopNodeFailureHandler());
+
+        cfg.getDataStorageConfiguration()
+            .setFileIOFactory(new ThrowableFileIOTestFactory(cfg.getDataStorageConfiguration().getFileIOFactory()))
+            .setWalMode(WALMode.FSYNC); // Allows to use special IO at WAL as well.
+
+        cfg.setCacheConfiguration(new CacheConfiguration<>()
+            .setAffinity(new RendezvousAffinityFunction(false, 1))
+            .setBackups(backupNodes)
+            .setName(DEFAULT_CACHE_NAME)
+            .setAtomicityMode(TRANSACTIONAL)
+            .setWriteSynchronizationMode(FULL_SYNC) // Allows to be sure that all messages are sent when put succeed.
+            .setReadFromBackup(true)); // Allows to check values on backups for idle_verify.
+
+        return cfg;
+    }
+
+    /** */
+    @Test
+    public void testLwmCountersOnClusterCrash() throws Exception {
+        int prepareRqNum = 20;
+        int finishRqNum = 30;
+
+        IgniteEx ignite = startGrids(nodes);
+        ignite.cluster().state(ClusterState.ACTIVE);
+
+        final AtomicInteger updateCnt = new AtomicInteger();
+
+        // Enough to have historical rebalance when needed.
+        for (int i = 0; i < 2_000; i++)
+            ignite.cache(DEFAULT_CACHE_NAME).put(updateCnt.incrementAndGet(), 0);
+
+        stopAllGrids();
+        startGrids(nodes);
+
+        Ignite prim = primaryNode(0L, DEFAULT_CACHE_NAME);
+
+        CountDownLatch prepareLatch = new CountDownLatch(backupNodes * prepareRqNum);
+        CountDownLatch finishLatch = new CountDownLatch(backupNodes * finishRqNum);
+
+        TestRecordingCommunicationSpi.spi(prim).blockMessages(new IgniteBiPredicate<ClusterNode, Message>() {
+            @Override public boolean apply(ClusterNode node, Message msg) {
+                if (msg instanceof GridDhtTxPrepareRequest && prepareLatch.getCount() > 0) {
+                    prepareLatch.countDown();
+
+                    return true;
+                }
+                else if (msg instanceof GridDhtTxFinishRequest && finishLatch.getCount() > 0) {
+                    finishLatch.countDown();
+
+                    return true;
+                }
+                else
+                    return false;
+            }
+        });
+
+        IgniteCache<Integer, Integer> primCache = prim.cache(DEFAULT_CACHE_NAME);
+
+        for (int i = 0; i < prepareRqNum; i++)
+            GridTestUtils.runAsync(() -> primCache.put(updateCnt.incrementAndGet(), 0));
+
+        prepareLatch.await(getTestTimeout(), TimeUnit.MILLISECONDS);
+
+        for (int i = 0; i < finishRqNum; i++)
+            GridTestUtils.runAsync(() -> primCache.put(updateCnt.incrementAndGet(), 0));
+
+        finishLatch.await(getTestTimeout(), TimeUnit.MILLISECONDS);
+
+        G.allGrids().forEach(
+            node -> ((ThrowableFileIOTestFactory)node.configuration().getDataStorageConfiguration().getFileIOFactory())
+                .setThrowEx(true));
+
+        stopAllGrids();
+        startGrids(nodes);
+
+        awaitPartitionMapExchange();
+
+        assertEquals(EXIT_CODE_OK, execute("--cache", "idle_verify"));
+        assertConflicts(false, false);

Review Comment:
   This code will be successful when all data missed too :) 
   It also will be successful at full rebalance (current case) :(
   
   Please check the result properly.



##########
modules/control-utility/src/test/java/org/apache/ignite/util/GridCommandHandlerConsistencyOnClusterCrashTest.java:
##########
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.util;
+
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+import org.apache.ignite.Ignite;
+import org.apache.ignite.IgniteCache;
+import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
+import org.apache.ignite.cluster.ClusterNode;
+import org.apache.ignite.cluster.ClusterState;
+import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.configuration.IgniteConfiguration;
+import org.apache.ignite.configuration.WALMode;
+import org.apache.ignite.failure.StopNodeFailureHandler;
+import org.apache.ignite.internal.IgniteEx;
+import org.apache.ignite.internal.TestRecordingCommunicationSpi;
+import org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTxFinishRequest;
+import org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTxPrepareRequest;
+import org.apache.ignite.internal.util.typedef.G;
+import org.apache.ignite.lang.IgniteBiPredicate;
+import org.apache.ignite.plugin.extensions.communication.Message;
+import org.apache.ignite.testframework.GridTestUtils;
+import org.apache.ignite.testframework.ListeningTestLogger;
+import org.junit.Before;
+import org.junit.Test;
+import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL;
+import static org.apache.ignite.cache.CacheWriteSynchronizationMode.FULL_SYNC;
+import static org.apache.ignite.internal.commandline.CommandHandler.EXIT_CODE_OK;
+
+/** */
+public class GridCommandHandlerConsistencyOnClusterCrashTest extends GridCommandHandlerClusterPerMethodAbstractTest {
+    /** Listening logger. */
+    protected final ListeningTestLogger listeningLog = new ListeningTestLogger(log);
+
+    /** Number of cluster nodes. */
+    public int nodes = 3;
+
+    /** Number of backups for the default cache. */
+    public int backupNodes = nodes - 1;
+
+    /** */
+    @Before public void beforeEachTest() throws Exception {

Review Comment:
   `Annotation @Before must be on the separated line` (hint from IDEA)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@ignite.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org