You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@ignite.apache.org by "ASF GitHub Bot (JIRA)" <ji...@apache.org> on 2018/08/20 09:46:00 UTC
[jira] [Commented] (IGNITE-9188) Unexpected eviction leading to
data loss in a scenario with stopping/restarting nodes during rebalancing
[ https://issues.apache.org/jira/browse/IGNITE-9188?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16585708#comment-16585708 ]
ASF GitHub Bot commented on IGNITE-9188:
----------------------------------------
GitHub user ascherbakoff opened a pull request:
https://github.com/apache/ignite/pull/4578
IGNITE-9188 Unexpected eviction leading to data loss
You can merge this pull request into a Git repository by running:
$ git pull https://github.com/gridgain/apache-ignite ignite-9188
Alternatively you can review and apply these changes as the patch at:
https://github.com/apache/ignite/pull/4578.patch
To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:
This closes #4578
----
commit b207bd0b992aa09885242c5da0b7f2c6fdd621f2
Author: ascherbakoff <al...@...>
Date: 2018-08-06T20:46:00Z
IGNITE-9188 Unexpected eviction leading to data loss in a scenario.
commit 88bfeb16422634761eb9f1e412bbc21231387446
Author: Aleksei Scherbakov <al...@...>
Date: 2018-08-08T10:03:43Z
IGNITE-9188 Unexpected eviction leading to data loss.
commit b9a2ecb7ffe80bc3e40d842e211b1037ed0b7af7
Author: Aleksei Scherbakov <al...@...>
Date: 2018-08-14T14:39:35Z
Merge branch 'master' of https://github.com/apache/ignite into ignite-9188
commit cf3a88a9e2268c2f7e35f556d36da0d00fd7cfef
Author: Aleksei Scherbakov <al...@...>
Date: 2018-08-14T17:24:34Z
IGNITE-9188 Unexpected eviction leading to data loss.
commit d721ce138a4920e4d7c7297c98017ffcbecc81eb
Author: Aleksei Scherbakov <al...@...>
Date: 2018-08-15T17:23:43Z
IGNITE-9188 Unexpected eviction leading to data loss.
commit 0e04bedfd9727076bf2fe969c4c3fb8c0cb48757
Author: Aleksei Scherbakov <al...@...>
Date: 2018-08-20T09:10:52Z
IGNITE-9188 wip.
commit 9e1df5550deaf34bdbc8b76b001d16684aa15803
Author: Aleksei Scherbakov <al...@...>
Date: 2018-08-20T09:11:11Z
Merge branch 'master' of https://github.com/apache/ignite into ignite-9188
----
> Unexpected eviction leading to data loss in a scenario with stopping/restarting nodes during rebalancing
> --------------------------------------------------------------------------------------------------------
>
> Key: IGNITE-9188
> URL: https://issues.apache.org/jira/browse/IGNITE-9188
> Project: Ignite
> Issue Type: Bug
> Reporter: Alexei Scherbakov
> Assignee: Alexei Scherbakov
> Priority: Major
> Fix For: 2.7
>
>
> Scenario:
> 1. Split grid nodes in two groups with distinct partition mapping. One group holds even partitions, other - odd. Rebalancing of "odd" partitions is only triggered when number of nodes in grid exceeds n/2 threshold.
> 2. Start n/2 nodes, activate, put data into "even" partitions.
> 3. Start other n/2 nodes, change BLT, delay rebalancing of "odd" partitions.
> 4. Stop newly started nodes before rebalancing is finished.
> Expected behavior: parttiions in "odd" group will keep owning state.
> Actual behavior: "odd" partitions are evicted leading to data loss.
> Unit test reproducer:
> {noformat}
> /*
> * Licensed to the Apache Software Foundation (ASF) under one or more
> * contributor license agreements. See the NOTICE file distributed with
> * this work for additional information regarding copyright ownership.
> * The ASF licenses this file to You under the Apache License, Version 2.0
> * (the "License"); you may not use this file except in compliance with
> * the License. You may obtain a copy of the License at
> *
> * http://www.apache.org/licenses/LICENSE-2.0
> *
> * Unless required by applicable law or agreed to in writing, software
> * distributed under the License is distributed on an "AS IS" BASIS,
> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> * See the License for the specific language governing permissions and
> * limitations under the License.
> */
> package org.apache.ignite.internal.processors.cache.distributed;
> import java.util.ArrayList;
> import java.util.Collection;
> import java.util.HashMap;
> import java.util.List;
> import java.util.Map;
> import java.util.UUID;
> import org.apache.ignite.Ignite;
> import org.apache.ignite.cache.CacheAtomicityMode;
> import org.apache.ignite.cache.CacheMode;
> import org.apache.ignite.cache.affinity.AffinityFunctionContext;
> import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
> import org.apache.ignite.cluster.ClusterNode;
> import org.apache.ignite.configuration.CacheConfiguration;
> import org.apache.ignite.configuration.DataRegionConfiguration;
> import org.apache.ignite.configuration.DataStorageConfiguration;
> import org.apache.ignite.configuration.IgniteConfiguration;
> import org.apache.ignite.configuration.WALMode;
> import org.apache.ignite.internal.TestRecordingCommunicationSpi;
> import org.apache.ignite.internal.processors.cache.GridCacheUtils;
> import org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtLocalPartition;
> import org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionDemandMessage;
> import org.apache.ignite.internal.util.typedef.G;
> import org.apache.ignite.internal.util.typedef.internal.CU;
> import org.apache.ignite.internal.util.typedef.internal.U;
> import org.apache.ignite.lang.IgniteBiPredicate;
> import org.apache.ignite.plugin.extensions.communication.Message;
> import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
> import org.jetbrains.annotations.Nullable;
> import static org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtPartitionState.OWNING;
> /**
> *
> */
> public class CacheLostPartitionsRestoreStateTest extends GridCommonAbstractTest {
> /** */
> public static final long MB = 1024 * 1024L;
> /** */
> public static final String GRP_ATTR = "grp";
> /** */
> public static final int GRIDS_CNT = 6;
> /** */
> public static final String CACHE_1 = "filled";
> /** */
> public static final String CACHE_2 = "empty";
> /** */
> public static final String EVEN_GRP = "event";
> /** */
> public static final String ODD_GRP = "odd";
> /** {@inheritDoc} */
> @Override protected IgniteConfiguration getConfiguration(String igniteInstanceName) throws Exception {
> IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName);
> cfg.setCommunicationSpi(new TestRecordingCommunicationSpi());
> CacheConfiguration ccfg = new CacheConfiguration("default");
> ccfg.setAffinity(new RendezvousAffinityFunction(false, CacheConfiguration.MAX_PARTITIONS_COUNT));
> cfg.setCacheConfiguration(ccfg);
> cfg.setPeerClassLoadingEnabled(true);
> Map<String, Object> attrs = new HashMap<>();
> attrs.put(GRP_ATTR, grp(getTestIgniteInstanceIndex(igniteInstanceName)));
> cfg.setUserAttributes(attrs);
> DataStorageConfiguration memCfg = new DataStorageConfiguration()
> .setDefaultDataRegionConfiguration(
> new DataRegionConfiguration().setPersistenceEnabled(true).setInitialSize(50 * MB).setMaxSize(50 * MB))
> .setWalMode(WALMode.LOG_ONLY);
> cfg.setDataStorageConfiguration(memCfg);
> cfg.setCacheConfiguration(configuration(CACHE_1), configuration(CACHE_2));
> return cfg;
> }
> /**
> * @param name Name.
> */
> private CacheConfiguration configuration(String name) {
> return new CacheConfiguration(name).
> setCacheMode(CacheMode.PARTITIONED).
> setAtomicityMode(CacheAtomicityMode.TRANSACTIONAL).
> setBackups(2).
> setRebalanceBatchSize(1).
> setAffinity(new TestAffinityFunction().setPartitions(32));
> }
> /**
> * @param idx Index.
> */
> private String grp(int idx) {
> return idx < GRIDS_CNT / 2 ? EVEN_GRP : ODD_GRP;
> }
> /**
> * @throws Exception if failed.
> */
> public void test() throws Exception {
> try {
> Ignite ignite = startGridsMultiThreaded(GRIDS_CNT / 2, false);
> ignite.cluster().active(true);
> awaitPartitionMapExchange();
> int blockPartId = 1;
> int c = 0;
> for (int i = 0; i < 1000; i++) {
> if (ignite.affinity(CACHE_1).partition(i) == blockPartId) {
> ignite.cache(CACHE_1).put(i, i);
> c++;
> }
> }
> assertEquals(c, ignite.cache(CACHE_1).size());
> startGridsMultiThreaded(GRIDS_CNT / 2, GRIDS_CNT / 2);
> // Prevent rebalancing to new nodes.
> for (Ignite ig0 : G.allGrids()) {
> TestRecordingCommunicationSpi.spi(ig0).blockMessages(new IgniteBiPredicate<ClusterNode, Message>() {
> @Override public boolean apply(ClusterNode node, Message message) {
> if (message instanceof GridDhtPartitionDemandMessage) {
> assertTrue(node.order() <= GRIDS_CNT / 2);
> GridDhtPartitionDemandMessage msg = (GridDhtPartitionDemandMessage)message;
> return msg.groupId() == CU.cacheId(CACHE_1) || msg.groupId() == CU.cacheId(CACHE_2);
> }
> return false;
> }
> });
> }
> ignite.cluster().setBaselineTopology(GRIDS_CNT);
> for (Ignite ig0 : G.allGrids()) {
> if (ig0.cluster().localNode().order() <= GRIDS_CNT / 2)
> continue;
> TestRecordingCommunicationSpi.spi(ig0).waitForBlocked();
> }
> assertEquals(c, ignite.cache(CACHE_1).size());
> assertEquals(c, ignite.cache(CACHE_1).size());
> int i = 0;
> while(i < GRIDS_CNT / 2) {
> stopGrid(GRIDS_CNT / 2 + i);
> i++;
> }
> awaitPartitionMapExchange();
> for (Ignite ig : G.allGrids()) {
> GridDhtLocalPartition locPart = dht(ig.cache(CACHE_1)).topology().localPartition(blockPartId);
> assertNotNull(locPart);
> assertTrue(locPart.state() == OWNING);
> }
> }
> finally {
> stopAllGrids();
> }
> }
> /** {@inheritDoc} */
> @Override protected void beforeTest() throws Exception {
> cleanPersistenceDir();
> }
> /** {@inheritDoc} */
> @Override protected void afterTest() throws Exception {
> cleanPersistenceDir();
> }
> /** */
> public static class TestAffinityFunction extends RendezvousAffinityFunction {
> /** */
> public TestAffinityFunction() {
> }
> /** */
> public TestAffinityFunction(boolean exclNeighbors) {
> super(exclNeighbors);
> }
> /** */
> public TestAffinityFunction(boolean exclNeighbors, int parts) {
> super(exclNeighbors, parts);
> }
> /** */
> public TestAffinityFunction(int parts,
> @Nullable IgniteBiPredicate<ClusterNode, ClusterNode> backupFilter) {
> super(parts, backupFilter);
> }
> /** {@inheritDoc} */
> @Override public List<List<ClusterNode>> assignPartitions(AffinityFunctionContext affCtx) {
> int parts = partitions();
> List<List<ClusterNode>> assignments = new ArrayList<>(parts);
> Map<UUID, Collection<ClusterNode>> neighborhoodCache = isExcludeNeighbors() ?
> GridCacheUtils.neighbors(affCtx.currentTopologySnapshot()) : null;
> List<ClusterNode> nodes = affCtx.currentTopologySnapshot();
> Map<Object, List<ClusterNode>> nodesByGrp = U.newHashMap(2);
> for (ClusterNode node : nodes) {
> Object grp = node.attribute(GRP_ATTR);
> List<ClusterNode> grpNodes = nodesByGrp.get(grp);
> if (grpNodes == null)
> nodesByGrp.put(grp, (grpNodes = new ArrayList<>()));
> grpNodes.add(node);
> }
> boolean split = nodesByGrp.size() == 2;
> for (int i = 0; i < parts; i++) {
> List<ClusterNode> partAssignment = assignPartition(i, split ?
> nodesByGrp.get(i % 2 == 0 ? EVEN_GRP : ODD_GRP) : nodes,
> affCtx.backups(), neighborhoodCache);
> assignments.add(partAssignment);
> }
> return assignments;
> }
> }
> }
> {noformat}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)