You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ignite.apache.org by sb...@apache.org on 2016/08/19 11:49:03 UTC

[46/53] [abbrv] ignite git commit: IGNITE-3678: Test for partition distribution

IGNITE-3678: Test for partition distribution

This submission includes a test that will assert the even and safe
distribution of partitions when server nodes are forcibly killed.

Additionally, the following are included:

1. A new Assertion interface. This looks similar on the surface
   to IgnitePredicate - however there are two subtle differences:
     (a) it returns void, not boolean
     (b) it throws AssertionError
   Since this does not return boolean, the caller does not have
   to check for the result of the assertion; if the assertion fails
   an exception is thrown which contains the details of the failed
   assertion.

2. GridRollingRestartAbstractTest: a new abstract test class that
   is extended for tests that require:
     (a) multiple JVMs
     (b) the first JVM to be a client (i.e. not own any partitions)
     (c) the server JVMs to be killed forcibly (not gracefully)
         while the test executes


Project: http://git-wip-us.apache.org/repos/asf/ignite/repo
Commit: http://git-wip-us.apache.org/repos/asf/ignite/commit/632ff9c0
Tree: http://git-wip-us.apache.org/repos/asf/ignite/tree/632ff9c0
Diff: http://git-wip-us.apache.org/repos/asf/ignite/diff/632ff9c0

Branch: refs/heads/ignite-3299
Commit: 632ff9c0098e1d62c106cc6a2c3b3c49ab1005d7
Parents: fc4c68b
Author: Patrick Peralta <pa...@workday.com>
Authored: Thu Aug 11 14:38:10 2016 -0400
Committer: agura <ag...@gridgain.com>
Committed: Wed Aug 17 18:21:32 2016 +0300

----------------------------------------------------------------------
 .../ignite/internal/util/GridJavaProcess.java   |   5 +-
 .../rebalancing/CacheNodeSafeAssertion.java     | 118 +++++++
 ...cheRebalancingPartitionDistributionTest.java | 149 +++++++++
 .../assertions/AlwaysAssertion.java             |  29 ++
 .../testframework/assertions/Assertion.java     |  31 ++
 .../testframework/assertions/package-info.java  |  22 ++
 .../testframework/junits/GridAbstractTest.java  |  10 +-
 .../common/GridRollingRestartAbstractTest.java  | 324 +++++++++++++++++++
 .../junits/multijvm/IgniteProcessProxy.java     |  24 +-
 .../IgniteCacheFailoverTestSuite.java           |   2 +
 10 files changed, 711 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ignite/blob/632ff9c0/modules/core/src/main/java/org/apache/ignite/internal/util/GridJavaProcess.java
----------------------------------------------------------------------
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/util/GridJavaProcess.java b/modules/core/src/main/java/org/apache/ignite/internal/util/GridJavaProcess.java
index 8a0b0ae..3f05e13 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/util/GridJavaProcess.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/util/GridJavaProcess.java
@@ -191,7 +191,10 @@ public final class GridJavaProcess {
 
         killProc.waitFor();
 
-        assert killProc.exitValue() == 0 : "Process killing was not successful";
+        int exitVal = killProc.exitValue();
+
+        if (exitVal != 0)
+            log.info(String.format("Abnormal exit value of %s for pid %s", exitVal, pid));
 
         if (procKilledC != null)
             procKilledC.apply();

http://git-wip-us.apache.org/repos/asf/ignite/blob/632ff9c0/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/rebalancing/CacheNodeSafeAssertion.java
----------------------------------------------------------------------
diff --git a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/rebalancing/CacheNodeSafeAssertion.java b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/rebalancing/CacheNodeSafeAssertion.java
new file mode 100644
index 0000000..bf6b63f
--- /dev/null
+++ b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/rebalancing/CacheNodeSafeAssertion.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.internal.processors.cache.distributed.rebalancing;
+
+import java.util.Collection;
+import java.util.Iterator;
+
+import org.apache.ignite.Ignite;
+import org.apache.ignite.cache.affinity.Affinity;
+import org.apache.ignite.cluster.ClusterNode;
+import org.apache.ignite.testframework.assertions.Assertion;
+
+/**
+ * {@link Assertion} that checks that the primary and backup partitions are distributed such that we won't lose any data
+ * if we lose a single node. This implies that the cache in question was configured with a backup count of at least one
+ * and that all partitions are backed up to a different node from the primary.
+ */
+public class CacheNodeSafeAssertion implements Assertion {
+    /** The {@link Ignite} instance. */
+    private final Ignite ignite;
+
+    /** The cache name. */
+    private final String cacheName;
+
+    /**
+     * Construct a new {@link CacheNodeSafeAssertion} for the given {@code cacheName}.
+     *
+     * @param ignite The Ignite instance.
+     * @param cacheName The cache name.
+     */
+    public CacheNodeSafeAssertion(Ignite ignite, String cacheName) {
+        this.ignite = ignite;
+        this.cacheName = cacheName;
+    }
+
+    /**
+     * @return Ignite instance.
+     */
+    protected Ignite ignite() {
+        return ignite;
+    }
+
+    /** {@inheritDoc} */
+    @Override public void test() throws AssertionError {
+        Affinity<?> affinity = ignite.affinity(cacheName);
+
+        int partCnt = affinity.partitions();
+
+        boolean hostSafe = true;
+
+        boolean nodeSafe = true;
+
+        for (int x = 0; x < partCnt; ++x) {
+            // Results are returned with the primary node first and backups after. We want to ensure that there is at
+            // least one backup on a different host.
+            Collection<ClusterNode> results = affinity.mapPartitionToPrimaryAndBackups(x);
+
+            Iterator<ClusterNode> nodes = results.iterator();
+
+            boolean newHostSafe = false;
+
+            boolean newNodeSafe = false;
+
+            if (nodes.hasNext()) {
+                ClusterNode primary = nodes.next();
+
+                // For host safety, get all nodes on the same host as the primary node and ensure at least one of the
+                // backups is on a different host. For node safety, make sure at least of of the backups is not the
+                // primary.
+                Collection<ClusterNode> neighbors = hostSafe ? ignite.cluster().forHost(primary).nodes() : null;
+
+                while (nodes.hasNext()) {
+                    ClusterNode backup = nodes.next();
+
+                    if (hostSafe) {
+                        if (!neighbors.contains(backup))
+                            newHostSafe = true;
+                    }
+
+                    if (nodeSafe) {
+                        if (!backup.equals(primary))
+                            newNodeSafe = true;
+                    }
+                }
+            }
+
+            hostSafe = newHostSafe;
+
+            nodeSafe = newNodeSafe;
+
+            if (!hostSafe && !nodeSafe)
+                break;
+        }
+
+        if (hostSafe)
+            return;
+
+        if (nodeSafe)
+            return;
+
+        throw new AssertionError("Cache " + cacheName + " is endangered!");
+    }
+}

http://git-wip-us.apache.org/repos/asf/ignite/blob/632ff9c0/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/rebalancing/GridCacheRebalancingPartitionDistributionTest.java
----------------------------------------------------------------------
diff --git a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/rebalancing/GridCacheRebalancingPartitionDistributionTest.java b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/rebalancing/GridCacheRebalancingPartitionDistributionTest.java
new file mode 100644
index 0000000..61ee9ea
--- /dev/null
+++ b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/rebalancing/GridCacheRebalancingPartitionDistributionTest.java
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.internal.processors.cache.distributed.rebalancing;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.ignite.Ignite;
+import org.apache.ignite.cache.CacheAtomicWriteOrderMode;
+import org.apache.ignite.cache.CacheAtomicityMode;
+import org.apache.ignite.cache.CacheMode;
+import org.apache.ignite.cache.CacheRebalanceMode;
+import org.apache.ignite.cache.CacheWriteSynchronizationMode;
+import org.apache.ignite.cache.affinity.Affinity;
+import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
+import org.apache.ignite.cluster.ClusterNode;
+import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.lang.IgnitePredicate;
+import org.apache.ignite.testframework.assertions.Assertion;
+import org.apache.ignite.testframework.junits.common.GridRollingRestartAbstractTest;
+
+
+/**
+ * Test the behavior of the partition rebalancing during a rolling restart.
+ */
+public class GridCacheRebalancingPartitionDistributionTest extends GridRollingRestartAbstractTest {
+    /** The maximum allowable deviation from a perfect distribution. */
+    private static final double MAX_DEVIATION = 0.20;
+
+    /** Test cache name. */
+    private static final String CACHE_NAME = "PARTITION_DISTRIBUTION_TEST";
+
+    /** {@inheritDoc} */
+    @Override protected CacheConfiguration<Integer, Integer> getCacheConfiguration() {
+        return new CacheConfiguration<Integer, Integer>(CACHE_NAME)
+                .setAtomicityMode(CacheAtomicityMode.TRANSACTIONAL)
+                .setCacheMode(CacheMode.PARTITIONED)
+                .setBackups(1)
+                .setAffinity(new RendezvousAffinityFunction(true /* machine-safe */, 271))
+                .setAtomicWriteOrderMode(CacheAtomicWriteOrderMode.CLOCK)
+                .setRebalanceMode(CacheRebalanceMode.SYNC)
+                .setWriteSynchronizationMode(CacheWriteSynchronizationMode.FULL_SYNC);
+    }
+
+    /**
+     * The test performs rolling restart and checks no server drops out and the partitions are balanced during
+     * redistribution.
+     */
+    public void testRollingRestart() throws InterruptedException {
+        awaitPartitionMapExchange();
+
+        rollingRestartThread.join();
+
+        assertEquals(getMaxRestarts(), rollingRestartThread.getRestartTotal());
+    }
+
+    /** {@inheritDoc} */
+    @Override public int serverCount() {
+        return 5;
+    }
+
+    /** {@inheritDoc} */
+    @Override public int getMaxRestarts() {
+        return 5;
+    }
+
+    /** {@inheritDoc} */
+    @Override public IgnitePredicate<Ignite> getRestartCheck() {
+        return new IgnitePredicate<Ignite>() {
+            @Override public boolean apply(final Ignite ignite) {
+                Collection<ClusterNode> srvs = ignite.cluster().forServers().nodes();
+
+                if (srvs.size() < serverCount())
+                    return false;
+
+                for (ClusterNode node : srvs) {
+                    int[] primaries = ignite.affinity(CACHE_NAME).primaryPartitions(node);
+
+                    if (primaries == null || primaries.length == 0)
+                        return false;
+                }
+
+                return true;
+            }
+        };
+    }
+
+    /** {@inheritDoc} */
+    @Override public Assertion getRestartAssertion() {
+        return new FairDistributionAssertion();
+    }
+
+    /**
+     * Assertion for {@link RollingRestartThread} to perform prior to each restart to test
+     * the Partition Distribution.
+     */
+    private class FairDistributionAssertion extends CacheNodeSafeAssertion {
+        /** Construct a new FairDistributionAssertion. */
+        public FairDistributionAssertion() {
+            super(grid(0), CACHE_NAME);
+        }
+
+        /** {@inheritDoc} */
+        @Override public void test() throws AssertionError {
+            super.test();
+
+            Affinity<?> affinity = ignite().affinity(CACHE_NAME);
+
+            int partCnt = affinity.partitions();
+
+            Map<ClusterNode, Integer> partMap = new HashMap<>(serverCount());
+
+            for (int i = 0; i < partCnt; i++) {
+                ClusterNode node = affinity.mapPartitionToNode(i);
+
+                int cnt = partMap.containsKey(node) ? partMap.get(node) : 0;
+
+                partMap.put(node, cnt + 1);
+            }
+
+            int fairCnt = partCnt / serverCount();
+
+            for (int count : partMap.values()) {
+                double deviation = Math.abs(fairCnt - count) / (double)fairCnt;
+
+                if (deviation > MAX_DEVIATION) {
+                    throw new AssertionError("partition distribution deviation exceeded max: fair count=" + fairCnt
+                            + ", actual count=" + count + ", deviation=" + deviation);
+                }
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/ignite/blob/632ff9c0/modules/core/src/test/java/org/apache/ignite/testframework/assertions/AlwaysAssertion.java
----------------------------------------------------------------------
diff --git a/modules/core/src/test/java/org/apache/ignite/testframework/assertions/AlwaysAssertion.java b/modules/core/src/test/java/org/apache/ignite/testframework/assertions/AlwaysAssertion.java
new file mode 100644
index 0000000..f786d4d
--- /dev/null
+++ b/modules/core/src/test/java/org/apache/ignite/testframework/assertions/AlwaysAssertion.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.testframework.assertions;
+
+/** An {@link Assertion} that always passes. */
+public class AlwaysAssertion implements Assertion {
+    /** Singleton instance */
+    public static final Assertion INSTANCE = new AlwaysAssertion();
+
+    /** {@inheritDoc} */
+    @Override public void test() throws AssertionError {
+        // No-op.
+    }
+}

http://git-wip-us.apache.org/repos/asf/ignite/blob/632ff9c0/modules/core/src/test/java/org/apache/ignite/testframework/assertions/Assertion.java
----------------------------------------------------------------------
diff --git a/modules/core/src/test/java/org/apache/ignite/testframework/assertions/Assertion.java b/modules/core/src/test/java/org/apache/ignite/testframework/assertions/Assertion.java
new file mode 100644
index 0000000..4799d88
--- /dev/null
+++ b/modules/core/src/test/java/org/apache/ignite/testframework/assertions/Assertion.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.testframework.assertions;
+
+/**
+ * An {@link Assertion} is a condition that is expected to be true. Failing that, an implementation should throw an
+ * {@link AssertionError} or specialized subclass containing information about what the assertion failed.
+ */
+public interface Assertion {
+    /**
+     * Test that some condition has been satisfied.
+     *
+     * @throws AssertionError if the condition was not satisfied.
+     */
+    public void test() throws AssertionError;
+}

http://git-wip-us.apache.org/repos/asf/ignite/blob/632ff9c0/modules/core/src/test/java/org/apache/ignite/testframework/assertions/package-info.java
----------------------------------------------------------------------
diff --git a/modules/core/src/test/java/org/apache/ignite/testframework/assertions/package-info.java b/modules/core/src/test/java/org/apache/ignite/testframework/assertions/package-info.java
new file mode 100644
index 0000000..a35e01b
--- /dev/null
+++ b/modules/core/src/test/java/org/apache/ignite/testframework/assertions/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * <!-- Package description. -->
+ * Contains interfaces and classes for assertions.
+ */
+package org.apache.ignite.testframework.assertions;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/ignite/blob/632ff9c0/modules/core/src/test/java/org/apache/ignite/testframework/junits/GridAbstractTest.java
----------------------------------------------------------------------
diff --git a/modules/core/src/test/java/org/apache/ignite/testframework/junits/GridAbstractTest.java b/modules/core/src/test/java/org/apache/ignite/testframework/junits/GridAbstractTest.java
index 3910ce4..42a5c7a 100644
--- a/modules/core/src/test/java/org/apache/ignite/testframework/junits/GridAbstractTest.java
+++ b/modules/core/src/test/java/org/apache/ignite/testframework/junits/GridAbstractTest.java
@@ -1525,10 +1525,18 @@ public abstract class GridAbstractTest extends TestCase {
 
     /**
      * @param gridName Grid name.
+     * @return {@code True} if the name of the grid indicates that it was the first started (on this JVM).
+     */
+    protected boolean isFirstGrid(String gridName) {
+        return "0".equals(gridName.substring(getTestGridName().length()));
+    }
+
+    /**
+     * @param gridName Grid name.
      * @return <code>True</code> if test was run in multi-JVM mode and grid with this name was started at another JVM.
      */
     protected boolean isRemoteJvm(String gridName) {
-        return isMultiJvm() && !"0".equals(gridName.substring(getTestGridName().length()));
+        return isMultiJvm() && !isFirstGrid(gridName);
     }
 
     /**

http://git-wip-us.apache.org/repos/asf/ignite/blob/632ff9c0/modules/core/src/test/java/org/apache/ignite/testframework/junits/common/GridRollingRestartAbstractTest.java
----------------------------------------------------------------------
diff --git a/modules/core/src/test/java/org/apache/ignite/testframework/junits/common/GridRollingRestartAbstractTest.java b/modules/core/src/test/java/org/apache/ignite/testframework/junits/common/GridRollingRestartAbstractTest.java
new file mode 100644
index 0000000..6a7973c
--- /dev/null
+++ b/modules/core/src/test/java/org/apache/ignite/testframework/junits/common/GridRollingRestartAbstractTest.java
@@ -0,0 +1,324 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.testframework.junits.common;
+
+import org.apache.ignite.Ignite;
+import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.configuration.IgniteConfiguration;
+import org.apache.ignite.lang.IgnitePredicate;
+import org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi;
+import org.apache.ignite.testframework.assertions.AlwaysAssertion;
+import org.apache.ignite.testframework.assertions.Assertion;
+import org.apache.ignite.testframework.junits.multijvm.IgniteProcessProxy;
+
+import java.io.PrintWriter;
+import java.io.StringWriter;
+
+/**
+ * Base class for tests which use a {@link RollingRestartThread} to stop and start
+ * remote grid JVMs for failover testing.
+ */
+public abstract class GridRollingRestartAbstractTest extends GridCommonAbstractTest {
+    /** Thread that shuts down and restarts Grid nodes for this test. */
+    protected RollingRestartThread rollingRestartThread;
+
+    /** Default predicate used to determine if a Grid node should be restarted. */
+    protected final IgnitePredicate<Ignite> dfltRestartCheck = new IgnitePredicate<Ignite>() {
+        @Override public boolean apply(Ignite ignite) {
+            return serverCount() <= ignite.cluster().forServers().nodes().size();
+        }
+    };
+
+    /**
+     * @return The predicate used to determine if a Grid node should be restarted.
+     */
+    public IgnitePredicate<Ignite> getRestartCheck() {
+        return dfltRestartCheck;
+    }
+
+    /**
+     * Return the {@link Assertion} used to assert some condition before a node is
+     * stopped and started. If the assertion fails, the test will fail with that
+     * assertion.
+     *
+     * @return Assertion that will be tested before a node is restarted.
+     */
+    public Assertion getRestartAssertion() {
+        return AlwaysAssertion.INSTANCE;
+    }
+
+    /**
+     * @return The maximum number of times to perform a restart before exiting (&lt;= 0 implies no limit).
+     */
+    public int getMaxRestarts() {
+        return 3;
+    }
+
+    /**
+     * @return The amount of time in milliseconds to wait between node restarts.
+     */
+    public int getRestartInterval() {
+        return 5000;
+    }
+
+    /**
+     * @return The number of server nodes to start.
+     */
+    public abstract int serverCount();
+
+    /** {@inheritDoc} */
+    @Override protected IgniteConfiguration getConfiguration(String gridName) throws Exception {
+        IgniteConfiguration cfg = super.getConfiguration(gridName);
+
+        if (isFirstGrid(gridName)) {
+            cfg.setClientMode(true);
+
+            assert cfg.getDiscoverySpi() instanceof TcpDiscoverySpi;
+
+            ((TcpDiscoverySpi)cfg.getDiscoverySpi()).setForceServerMode(true);
+        }
+
+        cfg.setCacheConfiguration(getCacheConfiguration());
+
+        return cfg;
+    }
+
+    /**
+     * @return The cache configuration for the test cache.
+     */
+    protected abstract CacheConfiguration<?, ?> getCacheConfiguration();
+
+    /** {@inheritDoc} */
+    @Override protected boolean isMultiJvm() {
+        return true;
+    }
+
+    /** {@inheritDoc} */
+    @Override protected void beforeTestsStarted() throws Exception {
+        super.beforeTestsStarted();
+
+        // the +1 includes this JVM (the client)
+        startGrids(serverCount() + 1);
+
+        rollingRestartThread = new RollingRestartThread();
+
+        rollingRestartThread.start();
+    }
+
+    /** {@inheritDoc} */
+    @Override protected void afterTestsStopped() throws Exception {
+        super.afterTestsStopped();
+
+        stopAllGrids();
+
+        rollingRestartThread.shutdown();
+    }
+
+
+    /**
+     * Thread that performs a "rolling restart" of a set of Ignite grid processes.
+     * */
+    protected class RollingRestartThread extends Thread {
+        /** Running flag. */
+        private volatile boolean isRunning;
+
+        /** The total number of restarts performed by this thread. */
+        private volatile int restartTotal;
+
+        /** Index of Ignite grid that was most recently restarted. */
+        private int currRestartGridId;
+
+        /**
+         * Create a new {@link RollingRestartThread} that will stop and start Ignite Grid
+         * processes managed by the given test. The thread will check the given
+         * {@link #getRestartCheck()} predicate every {@link #getRestartInterval()} milliseconds and
+         * when it returns true, will start and then stop a Java process
+         * via the test class.
+         */
+        public RollingRestartThread() {
+            if (getRestartInterval() < 0)
+                throw new IllegalArgumentException("invalid restart interval: " + getRestartInterval());
+
+            setDaemon(true);
+
+            setName(RollingRestartThread.class.getSimpleName());
+        }
+
+        /**
+         * @return The total number of process restarts performed by this thread.
+         */
+        public int getRestartTotal() {
+            return restartTotal;
+        }
+
+        /**
+         * Stop the rolling restart thread and wait for it to fully exit.
+         *
+         * @throws InterruptedException If the calling thread was interrupted while waiting for
+         * the rolling restart thread to exit.
+         */
+        public synchronized void shutdown() throws InterruptedException {
+            isRunning = false;
+
+            interrupt();
+
+            join();
+        }
+
+        /** {@inheritDoc} */
+        @Override public synchronized void start() {
+            isRunning = true;
+
+            super.start();
+        }
+
+        /** {@inheritDoc} */
+        @Override public void run() {
+            Ignite ignite = grid(0);
+
+            ignite.log().info(getName() + ": started.");
+
+            IgnitePredicate<Ignite> restartCheck = getRestartCheck();
+
+            Assertion restartAssertion = getRestartAssertion();
+
+            while (isRunning) {
+                try {
+                    if (getRestartInterval() > 0)
+                        Thread.sleep(getRestartInterval());
+                    else
+                        Thread.yield();
+
+                    if (restartCheck.apply(ignite)) {
+                        restartAssertion.test();
+
+                        int restartGrid = nextGridToRestart();
+
+                        stopGrid(restartGrid);
+
+                        ignite.log().info(getName() + ": stopped a process.");
+
+                        startGrid(restartGrid);
+
+                        ignite.log().info(getName() + ": started a process.");
+
+                        int restartCnt = ++restartTotal;
+
+                        if (getMaxRestarts() > 0 && restartCnt >= getMaxRestarts())
+                            isRunning = false;
+                    }
+                }
+                catch (RuntimeException e) {
+                    if (isRunning) {
+                        StringWriter sw = new StringWriter();
+
+                        e.printStackTrace(new PrintWriter(sw));
+
+                        ignite.log().info(getName() + ": caught exception: " + sw.toString());
+                    }
+                    else
+                        ignite.log().info(getName() + ": caught exception while exiting: " + e);
+                }
+                catch (InterruptedException e) {
+                    Thread.currentThread().interrupt();
+
+                    if (isRunning) {
+                        StringWriter sw = new StringWriter();
+
+                        e.printStackTrace(new PrintWriter(sw));
+
+                        ignite.log().info(getName() + ": was interrupted: " + sw.toString());
+                    }
+                    else
+                        ignite.log().info(getName() + ": was interrupted while exiting: " + e);
+
+                    isRunning = false;
+                }
+                catch (AssertionError e) {
+                    StringWriter sw = new StringWriter();
+
+                    e.printStackTrace(new PrintWriter(sw));
+
+                    ignite.log().info(getName() + ": assertion failed: " + sw.toString());
+
+                    isRunning = false;
+                }
+            }
+
+            ignite.log().info(getName() + ": exited.");
+        }
+
+        /**
+         * Return the index of the next Grid to restart.
+         *
+         * @return Index of the next grid to start.
+         * @see #currRestartGridId
+         * @see GridRollingRestartAbstractTest#grid(int)
+         */
+        protected int nextGridToRestart() {
+            if (currRestartGridId == serverCount())
+                currRestartGridId = 0;
+
+            // Skip grid 0 because this is the "client" - the JVM that
+            // is executing the test.
+            return ++currRestartGridId;
+        }
+
+        /**
+         * Start the Grid at the given index.
+         *
+         * @param idx Index of Grid to start.
+         * @see GridRollingRestartAbstractTest#grid(int)
+         */
+        protected void startGrid(int idx) {
+            try {
+                GridRollingRestartAbstractTest.this.startGrid(idx);
+            }
+            catch (Exception e) {
+                throw new RuntimeException(e);
+            }
+        }
+
+        /**
+         * Stop the process for the Grid at the given index.
+         *
+         * @param idx Index of Grid to stop.
+         * @see GridRollingRestartAbstractTest#grid(int)
+         */
+        protected void stopGrid(int idx) {
+            Ignite remote = grid(idx);
+
+            assert remote instanceof IgniteProcessProxy : remote;
+
+            IgniteProcessProxy proc = (IgniteProcessProxy) remote;
+
+            int pid = proc.getProcess().getPid();
+
+            try {
+                grid(0).log().info(String.format("Killing grid id %d with PID %d", idx, pid));
+
+                IgniteProcessProxy.kill(proc.name());
+
+                grid(0).log().info(String.format("Grid id %d with PID %d stopped", idx, pid));
+            }
+            catch (Exception e) {
+                throw new RuntimeException(e);
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/ignite/blob/632ff9c0/modules/core/src/test/java/org/apache/ignite/testframework/junits/multijvm/IgniteProcessProxy.java
----------------------------------------------------------------------
diff --git a/modules/core/src/test/java/org/apache/ignite/testframework/junits/multijvm/IgniteProcessProxy.java b/modules/core/src/test/java/org/apache/ignite/testframework/junits/multijvm/IgniteProcessProxy.java
index 2598bc5..b1a1c62 100644
--- a/modules/core/src/test/java/org/apache/ignite/testframework/junits/multijvm/IgniteProcessProxy.java
+++ b/modules/core/src/test/java/org/apache/ignite/testframework/junits/multijvm/IgniteProcessProxy.java
@@ -213,8 +213,10 @@ public class IgniteProcessProxy implements IgniteEx {
     }
 
     /**
+     * Gracefully shut down the Grid.
+     *
      * @param gridName Grid name.
-     * @param cancel Cacnel flag.
+     * @param cancel If {@code true} then all jobs currently will be cancelled.
      */
     public static void stop(String gridName, boolean cancel) {
         IgniteProcessProxy proxy = gridProxies.get(gridName);
@@ -227,6 +229,26 @@ public class IgniteProcessProxy implements IgniteEx {
     }
 
     /**
+     * Forcefully shut down the Grid.
+     *
+     * @param gridName Grid name.
+     */
+    public static void kill(String gridName) {
+        IgniteProcessProxy proxy = gridProxies.get(gridName);
+
+        A.notNull(gridName, "gridName");
+
+        try {
+            proxy.getProcess().kill();
+        }
+        catch (Exception e) {
+            U.error(proxy.log, "Exception while killing " + gridName, e);
+        }
+
+        gridProxies.remove(gridName, proxy);
+    }
+
+    /**
      * @param locNodeId ID of local node the requested grid instance is managing.
      * @return An instance of named grid. This method never returns {@code null}.
      * @throws IgniteIllegalStateException Thrown if grid was not properly initialized or grid instance was stopped or

http://git-wip-us.apache.org/repos/asf/ignite/blob/632ff9c0/modules/core/src/test/java/org/apache/ignite/testsuites/IgniteCacheFailoverTestSuite.java
----------------------------------------------------------------------
diff --git a/modules/core/src/test/java/org/apache/ignite/testsuites/IgniteCacheFailoverTestSuite.java b/modules/core/src/test/java/org/apache/ignite/testsuites/IgniteCacheFailoverTestSuite.java
index c9e507d..26cea39 100644
--- a/modules/core/src/test/java/org/apache/ignite/testsuites/IgniteCacheFailoverTestSuite.java
+++ b/modules/core/src/test/java/org/apache/ignite/testsuites/IgniteCacheFailoverTestSuite.java
@@ -41,6 +41,7 @@ import org.apache.ignite.internal.processors.cache.distributed.dht.atomic.GridCa
 import org.apache.ignite.internal.processors.cache.distributed.near.GridCacheAtomicNearRemoveFailureTest;
 import org.apache.ignite.internal.processors.cache.distributed.near.GridCacheAtomicPrimaryWriteOrderNearRemoveFailureTest;
 import org.apache.ignite.internal.processors.cache.distributed.near.GridCacheNearRemoveFailureTest;
+import org.apache.ignite.internal.processors.cache.distributed.rebalancing.GridCacheRebalancingPartitionDistributionTest;
 import org.apache.ignite.testframework.GridTestUtils;
 
 /**
@@ -65,6 +66,7 @@ public class IgniteCacheFailoverTestSuite extends TestSuite {
 
         suite.addTestSuite(GridCacheAtomicInvalidPartitionHandlingSelfTest.class);
         suite.addTestSuite(GridCacheAtomicClientInvalidPartitionHandlingSelfTest.class);
+        suite.addTestSuite(GridCacheRebalancingPartitionDistributionTest.class);
 
         GridTestUtils.addTestIfNeeded(suite, GridCacheIncrementTransformTest.class, ignoredTests);