You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@ignite.apache.org by "Roman Shtykh (JIRA)" <ji...@apache.org> on 2019/03/25 05:19:00 UTC

[jira] [Created] (IGNITE-11620) GridDhtInvalidPartitionException stops the cluster

Roman Shtykh created IGNITE-11620:
-------------------------------------

             Summary: GridDhtInvalidPartitionException stops the cluster
                 Key: IGNITE-11620
                 URL: https://issues.apache.org/jira/browse/IGNITE-11620
             Project: Ignite
          Issue Type: Bug
    Affects Versions: 2.7, 2.6
            Reporter: Roman Shtykh


When injecting data and having it expired at the same time rebalancing occurs,
*GridDhtInvalidPartitionException* triggers *SYSTEM_WORKER_TERMINATION*.

This can cause cascading failures in the cluster and take the whole cluster down.

Simple test case:
{noformat}
import org.apache.ignite.IgniteCache;
import org.apache.ignite.configuration.CacheConfiguration;
import org.apache.ignite.configuration.IgniteConfiguration;
import org.apache.ignite.failure.StopNodeOrHaltFailureHandler;
import org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi;
import org.apache.ignite.spi.discovery.tcp.ipfinder.TcpDiscoveryIpFinder;
import org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder;
import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;

import javax.cache.expiry.CreatedExpiryPolicy;
import javax.cache.expiry.Duration;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;

import static org.apache.ignite.cache.CacheAtomicityMode.ATOMIC;
import static org.apache.ignite.cache.CacheMode.PARTITIONED;

/**
 *
 */
public class ExpireWhileRebalanceTest extends GridCommonAbstractTest {
    private static final int ENTRIES = 500000;

    /**
     *
     */
    protected static final TcpDiscoveryIpFinder IP_FINDER = new TcpDiscoveryVmIpFinder(true);

    /**
     * {@inheritDoc}
     */
    @Override
    protected IgniteConfiguration getConfiguration(String gridName) throws Exception {
        IgniteConfiguration cfg = super.getConfiguration(gridName);

        ((TcpDiscoverySpi) cfg.getDiscoverySpi()).setIpFinder(IP_FINDER);

        cfg.setFailureHandler(new StopNodeOrHaltFailureHandler());

        CacheConfiguration<Object, Object> ccfg = new CacheConfiguration<>(DEFAULT_CACHE_NAME);

        ccfg.setAtomicityMode(ATOMIC);
        ccfg.setCacheMode(PARTITIONED);
        ccfg.setExpiryPolicyFactory(CreatedExpiryPolicy.factoryOf(new Duration(TimeUnit.SECONDS, 1)));

        cfg.setCacheConfiguration(ccfg);

        return cfg;
    }

    /**
     * @throws Exception If failed.
     */
    public void testExpireWhileRebalancing() throws Exception {
        startGridsMultiThreaded(4);

        IgniteCache<Object, Object> cache = ignite(0).cache(DEFAULT_CACHE_NAME);

        CountDownLatch latch = new CountDownLatch(1);

        new Thread(() -> {
            for (int i = 1; i <= ENTRIES; i++) {
                cache.put(i, i);

                if (i % (ENTRIES / 10) == 0)
                    System.out.println(">>> Entries put: " + i);
            }
            latch.countDown();
        }).start();

        // stopping 0 has no effect
        stopGrid(3);

        awaitPartitionMapExchange();

        startGrid(3);

        latch.await(10, TimeUnit.SECONDS);
    }

    /**
     * {@inheritDoc}
     */
    @Override
    protected void afterTest() throws Exception {
        stopAllGrids();
    }
}
{noformat}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)