You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ignite.apache.org by ag...@apache.org on 2020/01/23 10:55:29 UTC
[ignite] branch master updated: IGNITE-12439 Warning about possible
OOME if cache with too many partitions is created in a limited-size data
region
This is an automated email from the ASF dual-hosted git repository.
agura pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ignite.git
The following commit(s) were added to refs/heads/master by this push:
new 3639f92 IGNITE-12439 Warning about possible OOME if cache with too many partitions is created in a limited-size data region
3639f92 is described below
commit 3639f92c7d3a50197ce90805215d63a52a9cda4f
Author: Sergey Chugunov <sc...@gridgain.com>
AuthorDate: Thu Jan 23 13:50:21 2020 +0300
IGNITE-12439 Warning about possible OOME if cache with too many partitions is created in a limited-size data region
Signed-off-by: Andrey Gura <ag...@apache.org>
---
.../affinity/IdealAffinityAssignment.java | 2 +-
.../cache/CacheAffinitySharedManager.java | 116 ++++++++
.../processors/cache/CacheGroupMetricsImpl.java | 19 ++
.../GridCacheDatabaseSharedManager.java | 2 +
.../cache/persistence/GridCacheOffheapManager.java | 8 +-
.../cache/persistence/pagemem/PageMemoryEx.java | 13 +
.../cache/persistence/pagemem/PageMemoryImpl.java | 20 ++
.../apache/ignite/internal/util/IgniteUtils.java | 14 +-
.../cache/CacheDataRegionConfigurationTest.java | 305 +++++++++++++++++++++
.../processors/cache/CacheGroupMetricsTest.java | 31 +++
.../ignite/testframework/ListeningTestLogger.java | 13 +
11 files changed, 540 insertions(+), 3 deletions(-)
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/affinity/IdealAffinityAssignment.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/affinity/IdealAffinityAssignment.java
index c930ec5..58fb341 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/affinity/IdealAffinityAssignment.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/affinity/IdealAffinityAssignment.java
@@ -90,7 +90,7 @@ public class IdealAffinityAssignment {
* @param nodes Nodes.
* @param assignment Assignment.
*/
- private static Map<Object, Set<Integer>> calculatePrimaries(
+ public static Map<Object, Set<Integer>> calculatePrimaries(
@Nullable List<ClusterNode> nodes,
List<List<ClusterNode>> assignment
) {
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheAffinitySharedManager.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheAffinitySharedManager.java
index 2d8ce7c..46ca65f 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheAffinitySharedManager.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheAffinitySharedManager.java
@@ -18,6 +18,7 @@
package org.apache.ignite.internal.processors.cache;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
@@ -26,6 +27,7 @@ import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
+import java.util.Optional;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
@@ -38,6 +40,8 @@ import org.apache.ignite.IgniteSystemProperties;
import org.apache.ignite.cache.affinity.AffinityFunction;
import org.apache.ignite.cluster.ClusterNode;
import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.configuration.DataRegionConfiguration;
+import org.apache.ignite.configuration.DataStorageConfiguration;
import org.apache.ignite.configuration.NearCacheConfiguration;
import org.apache.ignite.events.DiscoveryEvent;
import org.apache.ignite.events.Event;
@@ -110,6 +114,9 @@ public class CacheAffinitySharedManager<K, V> extends GridCacheSharedManagerAdap
/** Affinity information for all started caches (initialized on coordinator). */
private ConcurrentMap<Integer, CacheGroupHolder> grpHolders = new ConcurrentHashMap<>();
+ /** */
+ private CacheMemoryOverheadValidator validator = new CacheMemoryOverheadValidator();
+
/** Topology version which requires affinity re-calculation (set from discovery thread). */
private AffinityTopologyVersion lastAffVer;
@@ -1023,6 +1030,8 @@ public class CacheAffinitySharedManager<K, V> extends GridCacheSharedManagerAdap
fut.timeBag().finishLocalStage("Affinity initialization on cache group start " +
"[grp=" + grpDesc.cacheOrGroupName() + "]");
+ validator.validateCacheGroup(grpDesc);
+
return null;
}
);
@@ -1903,6 +1912,8 @@ public class CacheAffinitySharedManager<K, V> extends GridCacheSharedManagerAdap
fut.timeBag().finishLocalStage("Affinity centralized initialization (crd) " +
"[grp=" + desc.cacheOrGroupName() + ", crd=" + crd + "]");
+
+ validator.validateCacheGroup(desc);
}
});
@@ -2868,4 +2879,109 @@ public class CacheAffinitySharedManager<K, V> extends GridCacheSharedManagerAdap
return "WaitRebalanceInfo [topVer=" + topVer + ", grps=" + waitGrps + ']';
}
}
+
+ /**
+ * Validator for memory overhead of persistent caches.
+ *
+ * Persistent cache requires some overhead in dataregion memory, e.g. a metapage per partition created by the cache.
+ * If this overhead reaches some limit (hardcoded to 15% for now) it may cause critical errors on node during
+ * checkpoint.
+ *
+ * Validator is intended to analyze cache group configuration and print warning to log to inform user about
+ * found problem.
+ */
+ class CacheMemoryOverheadValidator {
+ /** */
+ private static final double MEMORY_OVERHEAD_THRESHOLD = 0.15;
+
+ /**
+ * Validates cache group configuration and prints warning if it violates 15% overhead limit.
+ *
+ * @param grpDesc Descriptor of cache group to validate.
+ */
+ void validateCacheGroup(CacheGroupDescriptor grpDesc) {
+ DataStorageConfiguration dsCfg = cctx.gridConfig().getDataStorageConfiguration();
+ CacheConfiguration<?, ?> grpCfg = grpDesc.config();
+
+ if (!CU.isPersistentCache(grpCfg, dsCfg) || CU.isSystemCache(grpDesc.cacheOrGroupName()))
+ return;
+
+ CacheGroupHolder grpHolder = grpHolders.get(grpDesc.groupId());
+
+ if (grpHolder != null) {
+ int partsNum = 0;
+ UUID locNodeId = cctx.localNodeId();
+
+ List<List<ClusterNode>> assignment = grpHolder.aff.idealAssignment().assignment();
+
+ for (List<ClusterNode> nodes : assignment) {
+ if (nodes.stream().anyMatch(n -> n.id().equals(locNodeId)))
+ partsNum++;
+ }
+
+ if (partsNum == 0)
+ return;
+
+ DataRegionConfiguration drCfg = findDataRegion(dsCfg, grpCfg.getDataRegionName());
+
+ if (drCfg == null)
+ return;
+
+ if ((1.0 * partsNum * dsCfg.getPageSize()) / drCfg.getMaxSize() > MEMORY_OVERHEAD_THRESHOLD)
+ log.warning(buildWarningMessage(grpDesc, drCfg, dsCfg.getPageSize(), partsNum));
+ }
+ }
+
+ /**
+ * Builds explanatory warning message.
+ *
+ * @param grpDesc Configuration of cache group violating memory overhead threshold.
+ * @param drCfg Configuration of data region configuration with not sufficient memory.
+ */
+ private String buildWarningMessage(CacheGroupDescriptor grpDesc,
+ DataRegionConfiguration drCfg,
+ int pageSize,
+ int partsNum
+ ) {
+ String res = "Cache group '%s'" +
+ " brings high overhead for its metainformation in data region '%s'." +
+ " Metainformation required for its partitions (%d partitions, %d bytes per partition, %d MBs total)" +
+ " will consume more than 15%% of data region memory (%d MBs)." +
+ " It may lead to critical errors on the node and cluster instability." +
+ " Please reduce number of partitions, add more memory to the data region" +
+ " or add more server nodes for this cache group.";
+
+ return String.format(
+ res,
+ grpDesc.cacheOrGroupName(),
+ drCfg.getName(),
+ partsNum,
+ pageSize,
+ U.sizeInMegabytes(partsNum * pageSize),
+ U.sizeInMegabytes(drCfg.getMaxSize())
+ );
+ }
+
+ /**
+ * Finds data region by name.
+ *
+ * @param dsCfg Data storage configuration.
+ * @param drName Data region name.
+ *
+ * @return Found data region.
+ */
+ @Nullable private DataRegionConfiguration findDataRegion(DataStorageConfiguration dsCfg, String drName) {
+ if (dsCfg.getDataRegionConfigurations() == null || drName == null)
+ return dsCfg.getDefaultDataRegionConfiguration();
+
+ if (dsCfg.getDefaultDataRegionConfiguration().getName().equals(drName))
+ return dsCfg.getDefaultDataRegionConfiguration();
+
+ Optional<DataRegionConfiguration> cfgOpt = Arrays.stream(dsCfg.getDataRegionConfigurations())
+ .filter(drCfg -> drCfg.getName().equals(drName))
+ .findFirst();
+
+ return cfgOpt.isPresent() ? cfgOpt.get() : null;
+ }
+ }
}
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheGroupMetricsImpl.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheGroupMetricsImpl.java
index b1394bc..51c00e6 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheGroupMetricsImpl.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheGroupMetricsImpl.java
@@ -69,6 +69,9 @@ public class CacheGroupMetricsImpl {
/** */
private final LongMetric sparseStorageSize;
+ /** Number of local partitions initialized on current node. */
+ private final AtomicLongMetric initLocalPartitionsNumber;
+
/** Interface describing a predicate of two integers. */
private interface IntBiPredicate {
/**
@@ -105,6 +108,8 @@ public class CacheGroupMetricsImpl {
idxBuildCntPartitionsLeft = mreg.longMetric("IndexBuildCountPartitionsLeft",
"Number of partitions need processed for finished indexes create or rebuilding.");
+ initLocalPartitionsNumber = mreg.longMetric("InitializedLocalPartitionsNumber", "Number of local partitions initialized on current node.");
+
DataRegion region = ctx.dataRegion();
// On client node, region is null.
@@ -188,6 +193,20 @@ public class CacheGroupMetricsImpl {
idxBuildCntPartitionsLeft.decrement();
}
+ /**
+ * Increments number of local partitions initialized on current node.
+ */
+ public void incrementInitializedLocalPartitions() {
+ initLocalPartitionsNumber.increment();
+ }
+
+ /**
+ * Decrements number of local partitions initialized on current node.
+ */
+ public void decrementInitializedLocalPartitions() {
+ initLocalPartitionsNumber.decrement();
+ }
+
/** */
public int getGroupId() {
return ctx.groupId();
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheDatabaseSharedManager.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheDatabaseSharedManager.java
index 4b6d317..8000f5f 100755
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheDatabaseSharedManager.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheDatabaseSharedManager.java
@@ -3957,6 +3957,8 @@ public class GridCacheDatabaseSharedManager extends IgniteCacheDatabaseSharedMan
req.onDone(null);
+ grp.metrics().decrementInitializedLocalPartitions();
+
if (log.isDebugEnabled())
log.debug("Partition file has destroyed [grpId=" + grpId + ", partId=" + partId + "]");
}
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheOffheapManager.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheOffheapManager.java
index af7998e..3867a63 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheOffheapManager.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheOffheapManager.java
@@ -1891,7 +1891,13 @@ public class GridCacheOffheapManager extends IgniteCacheOffheapManagerImpl imple
int grpId = grp.groupId();
long partMetaId = pageMem.partitionMetaPageId(grpId, partId);
- long partMetaPage = pageMem.acquirePage(grpId, partMetaId);
+ AtomicBoolean metaPageAllocated = new AtomicBoolean(false);
+
+ long partMetaPage = pageMem.acquirePage(grpId, partMetaId, metaPageAllocated);
+
+ if (metaPageAllocated.get())
+ grp.metrics().incrementInitializedLocalPartitions();
+
try {
boolean allocated = false;
boolean pendingTreeAllocated = false;
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/pagemem/PageMemoryEx.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/pagemem/PageMemoryEx.java
index ce615ed..8bba4e1 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/pagemem/PageMemoryEx.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/pagemem/PageMemoryEx.java
@@ -18,6 +18,7 @@
package org.apache.ignite.internal.processors.cache.persistence.pagemem;
import java.nio.ByteBuffer;
+import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.ignite.IgniteCheckedException;
import org.apache.ignite.IgniteException;
import org.apache.ignite.internal.IgniteInternalFuture;
@@ -85,6 +86,18 @@ public interface PageMemoryEx extends PageMemory {
/**
* @see #acquirePage(int, long)
+ * Sets additional flag indicating that page was not found in memory and had to be allocated.
+ *
+ * @param grpId Cache group ID.
+ * @param pageId Page ID.
+ * @param pageAllocated Flag is set if new page was allocated in offheap memory.
+ * @return Page.
+ * @throws IgniteCheckedException
+ */
+ public long acquirePage(int grpId, long pageId, AtomicBoolean pageAllocated) throws IgniteCheckedException;
+
+ /**
+ * @see #acquirePage(int, long)
* Will read page from file if it is not present in memory
*
* @param grpId Cache group ID.
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/pagemem/PageMemoryImpl.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/pagemem/PageMemoryImpl.java
index 1af5032..d2a8562 100755
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/pagemem/PageMemoryImpl.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/pagemem/PageMemoryImpl.java
@@ -680,8 +680,25 @@ public class PageMemoryImpl implements PageMemoryEx {
}
/** {@inheritDoc} */
+ @Override public long acquirePage(int grpId, long pageId, AtomicBoolean pageAllocated) throws IgniteCheckedException {
+ return acquirePage(grpId, pageId, IoStatisticsHolderNoOp.INSTANCE, false, pageAllocated);
+ }
+
+ /** {@inheritDoc} */
@Override public long acquirePage(int grpId, long pageId, IoStatisticsHolder statHolder,
boolean restore) throws IgniteCheckedException {
+ return acquirePage(grpId, pageId, statHolder, restore, null);
+ }
+
+ /**
+ * @param grpId Group id.
+ * @param pageId Page id.
+ * @param statHolder Stat holder.
+ * @param restore Restore.
+ * @param pageAllocated Page allocated.
+ */
+ private long acquirePage(int grpId, long pageId, IoStatisticsHolder statHolder,
+ boolean restore, @Nullable AtomicBoolean pageAllocated) throws IgniteCheckedException {
assert started;
FullPageId fullId = new FullPageId(pageId, grpId);
@@ -739,6 +756,9 @@ public class PageMemoryImpl implements PageMemoryEx {
if (relPtr == INVALID_REL_PTR) {
relPtr = seg.borrowOrAllocateFreePage(pageId);
+ if (pageAllocated != null)
+ pageAllocated.set(true);
+
if (relPtr == INVALID_REL_PTR)
relPtr = seg.removePageForReplacement(delayedWriter == null ? flushDirtyPage : delayedWriter);
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/util/IgniteUtils.java b/modules/core/src/main/java/org/apache/ignite/internal/util/IgniteUtils.java
index e11a896..c24b637 100755
--- a/modules/core/src/main/java/org/apache/ignite/internal/util/IgniteUtils.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/util/IgniteUtils.java
@@ -299,6 +299,9 @@ import static org.apache.ignite.internal.util.GridUnsafe.staticFieldOffset;
@SuppressWarnings({"UnusedReturnValue", "RedundantStringConstructorCall"})
public abstract class IgniteUtils {
/** */
+ public static final long MB = 1024L * 1024;
+
+ /** */
public static final long GB = 1024L * 1024 * 1024;
/** Minimum checkpointing page buffer size (may be adjusted by Ignite). */
@@ -3670,6 +3673,16 @@ public abstract class IgniteUtils {
}
/**
+ * Converts size in bytes to human-readable size in megabytes.
+ *
+ * @param sizeInBytes Size of any object (file, memory region etc) in bytes.
+ * @return Size converted to megabytes.
+ */
+ public static int sizeInMegabytes(long sizeInBytes) {
+ return (int)(sizeInBytes / MB);
+ }
+
+ /**
* Deletes file or directory with all sub-directories and files.
*
* @param path File or directory to delete.
@@ -5426,7 +5439,6 @@ public abstract class IgniteUtils {
return map;
}
-
/**
* Calculate a hashCode for an array.
*
diff --git a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/CacheDataRegionConfigurationTest.java b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/CacheDataRegionConfigurationTest.java
index 887679a..2754390 100644
--- a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/CacheDataRegionConfigurationTest.java
+++ b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/CacheDataRegionConfigurationTest.java
@@ -14,11 +14,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.ignite.internal.processors.cache;
import java.util.concurrent.Callable;
import org.apache.ignite.IgniteCache;
import org.apache.ignite.IgniteCheckedException;
+import org.apache.ignite.IgniteLogger;
+import org.apache.ignite.cache.CacheMode;
+import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
+import org.apache.ignite.cluster.ClusterNode;
import org.apache.ignite.configuration.CacheConfiguration;
import org.apache.ignite.configuration.DataPageEvictionMode;
import org.apache.ignite.configuration.DataRegionConfiguration;
@@ -26,11 +31,17 @@ import org.apache.ignite.configuration.DataStorageConfiguration;
import org.apache.ignite.configuration.IgniteConfiguration;
import org.apache.ignite.internal.IgniteEx;
import org.apache.ignite.internal.mem.IgniteOutOfMemoryException;
+import org.apache.ignite.internal.util.typedef.internal.U;
+import org.apache.ignite.lang.IgnitePredicate;
import org.apache.ignite.testframework.GridTestUtils;
+import org.apache.ignite.testframework.ListeningTestLogger;
+import org.apache.ignite.testframework.LogListener;
import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
import org.jetbrains.annotations.Nullable;
import org.junit.Test;
+import static org.apache.ignite.configuration.DataStorageConfiguration.DFLT_PAGE_SIZE;
+
/**
*
*/
@@ -42,6 +53,9 @@ public class CacheDataRegionConfigurationTest extends GridCommonAbstractTest {
private volatile DataStorageConfiguration memCfg;
/** */
+ private IgniteLogger logger;
+
+ /** */
private static final long DFLT_MEM_PLC_SIZE = 10L * 1024 * 1024;
/** */
@@ -51,6 +65,15 @@ public class CacheDataRegionConfigurationTest extends GridCommonAbstractTest {
@Override protected IgniteConfiguration getConfiguration(String gridName) throws Exception {
IgniteConfiguration cfg = super.getConfiguration(gridName);
+ if (logger != null)
+ cfg.setGridLogger(logger);
+
+ if (gridName.contains("client")) {
+ cfg.setClientMode(true);
+
+ return cfg;
+ }
+
if (memCfg != null)
cfg.setDataStorageConfiguration(memCfg);
@@ -63,6 +86,14 @@ public class CacheDataRegionConfigurationTest extends GridCommonAbstractTest {
/** {@inheritDoc} */
@Override protected void afterTest() throws Exception {
stopAllGrids();
+
+ cleanPersistenceDir();
+ }
+
+ @Override protected void beforeTest() throws Exception {
+ stopAllGrids();
+
+ cleanPersistenceDir();
}
/** */
@@ -203,6 +234,280 @@ public class CacheDataRegionConfigurationTest extends GridCommonAbstractTest {
}
/**
+ * Filter to exclude the node from affinity nodes by its name.
+ */
+ private static class NodeNameNodeFilter implements IgnitePredicate<ClusterNode> {
+ /** */
+ private final String filteredNode;
+
+ /**
+ * @param node Node.
+ */
+ private NodeNameNodeFilter(String node) {
+ filteredNode = node;
+ }
+
+ /** {@inheritDoc} */
+ @Override public boolean apply(ClusterNode node) {
+ return !node.attribute("org.apache.ignite.ignite.name").toString().contains(filteredNode);
+ }
+ }
+
+ /**
+ * Verifies that warning message is printed to the logs if user tries to start a static cache in data region which
+ * overhead (e.g. metapages for partitions) occupies more space of the region than a defined threshold (15%)
+ *
+ * @throws Exception If failed.
+ */
+ @Test
+ public void testWarningIfStaticCacheOverheadExceedsThreshold() throws Exception {
+ DataRegionConfiguration smallRegionCfg = new DataRegionConfiguration();
+ int numOfPartitions = 512;
+ int partitionsMetaMemoryChunk = U.sizeInMegabytes(512 * DFLT_PAGE_SIZE);
+
+ smallRegionCfg.setInitialSize(DFLT_MEM_PLC_SIZE);
+ smallRegionCfg.setMaxSize(DFLT_MEM_PLC_SIZE);
+ smallRegionCfg.setPersistenceEnabled(true);
+ smallRegionCfg.setName("smallRegion");
+
+ memCfg = new DataStorageConfiguration();
+ memCfg.setDefaultDataRegionConfiguration(smallRegionCfg);
+ //one hour to guarantee that checkpoint will be triggered by 'dirty pages amount' trigger
+ memCfg.setCheckpointFrequency(60 * 60 * 1000);
+
+ CacheConfiguration<Object, Object> manyPartitionsCache = new CacheConfiguration<>(DEFAULT_CACHE_NAME);
+
+ //512 partitions are enough only if primary and backups count
+ manyPartitionsCache.setAffinity(new RendezvousAffinityFunction(false, numOfPartitions));
+ manyPartitionsCache.setBackups(1);
+
+ ccfg = manyPartitionsCache;
+
+ ListeningTestLogger srv0Logger = new ListeningTestLogger(false, null);
+ LogListener cacheGrpLsnr0 = LogListener.matches("Cache group 'default' brings high overhead").build();
+ LogListener dataRegLsnr0 = LogListener.matches("metainformation in data region 'smallRegion'").build();
+ LogListener partsInfoLsnr0 = LogListener.matches(numOfPartitions + " partitions, " +
+ DFLT_PAGE_SIZE +
+ " bytes per partition, " + partitionsMetaMemoryChunk + " MBs total").build();
+ srv0Logger.registerAllListeners(cacheGrpLsnr0, dataRegLsnr0, partsInfoLsnr0);
+ logger = srv0Logger;
+
+ IgniteEx ignite0 = startGrid("srv0");
+
+ ListeningTestLogger srv1Logger = new ListeningTestLogger(false, null);
+ LogListener cacheGrpLsnr1 = LogListener.matches("Cache group 'default' brings high overhead").build();
+ LogListener dataRegLsnr1 = LogListener.matches("metainformation in data region 'smallRegion'").build();
+ LogListener partsInfoLsnr1 = LogListener.matches(numOfPartitions + " partitions, " +
+ DFLT_PAGE_SIZE +
+ " bytes per partition, " + partitionsMetaMemoryChunk + " MBs total").build();
+ srv1Logger.registerAllListeners(cacheGrpLsnr1, dataRegLsnr1, partsInfoLsnr1);
+ logger = srv1Logger;
+
+ startGrid("srv1");
+
+ ignite0.cluster().active(true);
+
+ //srv0 and srv1 print warning into the log as the threshold for cache in default cache group is broken
+ assertTrue(cacheGrpLsnr0.check());
+ assertTrue(dataRegLsnr0.check());
+ assertTrue(partsInfoLsnr0.check());
+
+ assertTrue(cacheGrpLsnr1.check());
+ assertTrue(dataRegLsnr1.check());
+ assertTrue(partsInfoLsnr1.check());
+ }
+
+ /**
+ * Verifies that warning message is printed to the logs if user tries to start a dynamic cache in data region which
+ * overhead (e.g. metapages for partitions) occupies more space of the region than a defined threshold.
+ *
+ * @throws Exception If failed.
+ */
+ @Test
+ public void testWarningIfDynamicCacheOverheadExceedsThreshold() throws Exception {
+ String filteredSrvName = "srv2";
+ int numOfPartitions = 512;
+ int partitionsMetaMemoryChunk = U.sizeInMegabytes(512 * DFLT_PAGE_SIZE);
+
+ DataRegionConfiguration smallRegionCfg = new DataRegionConfiguration();
+
+ smallRegionCfg.setName("smallRegion");
+ smallRegionCfg.setInitialSize(DFLT_MEM_PLC_SIZE);
+ smallRegionCfg.setMaxSize(DFLT_MEM_PLC_SIZE);
+ smallRegionCfg.setPersistenceEnabled(true);
+
+ //explicit default data region configuration to test possible NPE case
+ DataRegionConfiguration defaultRegionCfg = new DataRegionConfiguration();
+ defaultRegionCfg.setName("defaultRegion");
+ defaultRegionCfg.setInitialSize(DFLT_MEM_PLC_SIZE);
+ defaultRegionCfg.setMaxSize(DFLT_MEM_PLC_SIZE);
+ defaultRegionCfg.setPersistenceEnabled(true);
+
+ memCfg = new DataStorageConfiguration();
+ memCfg.setDefaultDataRegionConfiguration(defaultRegionCfg);
+ memCfg.setDataRegionConfigurations(smallRegionCfg);
+ //one hour to guarantee that checkpoint will be triggered by 'dirty pages amount' trigger
+ memCfg.setCheckpointFrequency(60 * 60 * 1000);
+
+ ListeningTestLogger srv0Logger = new ListeningTestLogger(false, null);
+ LogListener cacheGrpLsnr0 = LogListener.matches("Cache group 'default' brings high overhead").build();
+ LogListener dataRegLsnr0 = LogListener.matches("metainformation in data region 'defaultRegion'").build();
+ LogListener partsInfoLsnr0 = LogListener.matches(numOfPartitions + " partitions, " +
+ DFLT_PAGE_SIZE +
+ " bytes per partition, " + partitionsMetaMemoryChunk + " MBs total").build();
+ srv0Logger.registerAllListeners(cacheGrpLsnr0, dataRegLsnr0, partsInfoLsnr0);
+ logger = srv0Logger;
+
+ IgniteEx ignite0 = startGrid("srv0");
+
+ ListeningTestLogger srv1Logger = new ListeningTestLogger(false, null);
+ LogListener cacheGrpLsnr1 = LogListener.matches("Cache group 'default' brings high overhead").build();
+ LogListener dataRegLsnr1 = LogListener.matches("metainformation in data region 'defaultRegion'").build();
+ LogListener partsInfoLsnr1 = LogListener.matches(numOfPartitions + " partitions, " +
+ DFLT_PAGE_SIZE +
+ " bytes per partition, " + partitionsMetaMemoryChunk + " MBs total").build();
+ srv1Logger.registerAllListeners(cacheGrpLsnr1, dataRegLsnr1, partsInfoLsnr1);
+ logger = srv1Logger;
+
+ startGrid("srv1");
+
+ ListeningTestLogger srv2Logger = new ListeningTestLogger(false, null);
+ LogListener cacheGrpLsnr2 = LogListener.matches("Cache group 'default' brings high overhead").build();
+ srv2Logger.registerListener(cacheGrpLsnr2);
+ logger = srv2Logger;
+
+ startGrid("srv2");
+
+ ignite0.cluster().active(true);
+
+ IgniteEx cl = startGrid("client01");
+
+ CacheConfiguration<Object, Object> manyPartitionsCache = new CacheConfiguration<>(DEFAULT_CACHE_NAME);
+
+ manyPartitionsCache.setAffinity(new RendezvousAffinityFunction(false, numOfPartitions));
+ manyPartitionsCache.setNodeFilter(new NodeNameNodeFilter(filteredSrvName));
+ manyPartitionsCache.setBackups(1);
+
+ cl.createCache(manyPartitionsCache);
+
+ //srv0 and srv1 print warning into the log as the threshold for cache in default cache group is broken
+ assertTrue(cacheGrpLsnr0.check());
+ assertTrue(dataRegLsnr0.check());
+ assertTrue(partsInfoLsnr0.check());
+
+ assertTrue(cacheGrpLsnr1.check());
+ assertTrue(dataRegLsnr1.check());
+ assertTrue(partsInfoLsnr1.check());
+
+ //srv2 doesn't print the warning as it is filtered by node filter from affinity nodes
+ assertFalse(cacheGrpLsnr2.check());
+ }
+
+ /**
+ * Verifies that warning is printed out to logs if after removing nodes from baseline
+ * some caches reach or cross dangerous limit of metainformation overhead per data region.
+ *
+ * @throws Exception If failed.
+ */
+ @Test
+ public void testWarningOnBaselineTopologyChange() throws Exception {
+ DataRegionConfiguration defaultRegionCfg = new DataRegionConfiguration();
+ defaultRegionCfg.setInitialSize(DFLT_MEM_PLC_SIZE);
+ defaultRegionCfg.setMaxSize(DFLT_MEM_PLC_SIZE);
+ defaultRegionCfg.setPersistenceEnabled(true);
+
+ memCfg = new DataStorageConfiguration();
+ memCfg.setDefaultDataRegionConfiguration(defaultRegionCfg);
+ //one hour to guarantee that checkpoint will be triggered by 'dirty pages amount' trigger
+ memCfg.setCheckpointFrequency(60 * 60 * 1000);
+
+ ListeningTestLogger srv0Logger = new ListeningTestLogger(false, null);
+ LogListener cacheGrpLsnr0 = LogListener.matches("Cache group 'default' brings high overhead").build();
+ srv0Logger.registerListener(cacheGrpLsnr0);
+ logger = srv0Logger;
+
+ IgniteEx ignite0 = startGrid("srv0");
+
+ ListeningTestLogger srv1Logger = new ListeningTestLogger(false, null);
+ LogListener cacheGrpLsnr1 = LogListener.matches("Cache group 'default' brings high overhead").build();
+ srv1Logger.registerListener(cacheGrpLsnr1);
+ logger = srv1Logger;
+
+ startGrid("srv1");
+
+ ignite0.cluster().active(true);
+
+ ignite0.createCache(
+ new CacheConfiguration<>(DEFAULT_CACHE_NAME)
+ .setDataRegionName(defaultRegionCfg.getName())
+ .setCacheMode(CacheMode.PARTITIONED)
+ .setAffinity(new RendezvousAffinityFunction(false, 512))
+ );
+
+ assertFalse(cacheGrpLsnr0.check());
+ assertFalse(cacheGrpLsnr1.check());
+
+ stopGrid("srv1");
+
+ ignite0.cluster().baselineAutoAdjustEnabled(false);
+
+ long topVer = ignite0.cluster().topologyVersion();
+
+ ignite0.cluster().setBaselineTopology(topVer);
+
+ awaitPartitionMapExchange();
+
+ assertTrue(cacheGrpLsnr0.check());
+ }
+
+ /**
+ * Negative test: verifies that no warning is printed to logs if user starts static and dynamic caches
+ * in data region with enough capacity to host these caches;
+ * in other words, no thresholds for metapages ration are broken.
+ *
+ * @throws Exception If failed.
+ */
+ @Test
+ public void testNoWarningIfCacheConfigurationDoesntBreakThreshold() throws Exception {
+ DataRegionConfiguration defaultRegionCfg = new DataRegionConfiguration();
+ defaultRegionCfg.setInitialSize(DFLT_MEM_PLC_SIZE);
+ defaultRegionCfg.setMaxSize(DFLT_MEM_PLC_SIZE);
+ defaultRegionCfg.setPersistenceEnabled(true);
+
+ memCfg = new DataStorageConfiguration();
+ memCfg.setDefaultDataRegionConfiguration(defaultRegionCfg);
+ //one hour to guarantee that checkpoint will be triggered by 'dirty pages amount' trigger
+ memCfg.setCheckpointFrequency(60 * 60 * 1000);
+
+ CacheConfiguration<Object, Object> fewPartitionsCache = new CacheConfiguration<>(DEFAULT_CACHE_NAME);
+
+ //512 partitions are enough only if primary and backups count
+ fewPartitionsCache.setAffinity(new RendezvousAffinityFunction(false, 16));
+ fewPartitionsCache.setBackups(1);
+
+ ccfg = fewPartitionsCache;
+
+ ListeningTestLogger srv0Logger = new ListeningTestLogger(false, null);
+ LogListener cacheGrpLsnr0 = LogListener.matches("Cache group 'default' brings high overhead").build();
+ LogListener dynamicGrpLsnr = LogListener.matches("Cache group 'dynamicCache' brings high overhead").build();
+ srv0Logger.registerListener(cacheGrpLsnr0);
+ srv0Logger.registerListener(dynamicGrpLsnr);
+ logger = srv0Logger;
+
+ IgniteEx ignite0 = startGrid("srv0");
+
+ ignite0.cluster().active(true);
+
+ assertFalse(cacheGrpLsnr0.check());
+
+ ignite0.createCache(new CacheConfiguration<>("dynamicCache")
+ .setAffinity(new RendezvousAffinityFunction(false, 16))
+ );
+
+ assertFalse(dynamicGrpLsnr.check());
+ }
+
+ /**
* Verifies that {@link IgniteCheckedException} is thrown when page eviction threshold is less than 0.5.
*/
@Test
diff --git a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/CacheGroupMetricsTest.java b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/CacheGroupMetricsTest.java
index b0b5da4..eac1ab1 100644
--- a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/CacheGroupMetricsTest.java
+++ b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/CacheGroupMetricsTest.java
@@ -47,6 +47,7 @@ import org.apache.ignite.events.EventType;
import org.apache.ignite.internal.IgniteEx;
import org.apache.ignite.internal.processors.metric.GridMetricManager;
import org.apache.ignite.internal.processors.metric.MetricRegistry;
+import org.apache.ignite.internal.processors.metric.impl.AtomicLongMetric;
import org.apache.ignite.internal.util.lang.GridAbsPredicate;
import org.apache.ignite.internal.util.typedef.F;
import org.apache.ignite.internal.util.typedef.T2;
@@ -415,4 +416,34 @@ public class CacheGroupMetricsTest extends GridCommonAbstractTest implements Ser
mxBean0Grp2.get2().<LongMetric>findMetric("TotalAllocatedPages").value() +
mxBean0Grp3.get2().<LongMetric>findMetric("TotalAllocatedPages").value());
}
+
+ /**
+ * Verifies metric for initialized local partitions.
+ * It is incremented when partition is actually created on node and decremented when it is destroyed.
+ *
+ * @throws Exception If failed.
+ */
+ @Test
+ public void testInitializedLocalPartitions() throws Exception {
+ pds = true;
+
+ cleanPersistenceDir();
+
+ IgniteEx ignite = startGrid(0);
+
+ ignite.cluster().active(true);
+
+ MetricRegistry group1Metrics = cacheGroupMetrics(0, "group1").get2();
+
+ AtomicLongMetric locPartsNum = group1Metrics.findMetric("InitializedLocalPartitionsNumber");
+
+ assertEquals(0, locPartsNum.value());
+
+ IgniteCache cache = ignite.cache("cache1");
+
+ for (int i = 0; i < 10; i++)
+ cache.put(i, new byte[100]);
+
+ assertEquals(10, locPartsNum.value());
+ }
}
diff --git a/modules/core/src/test/java/org/apache/ignite/testframework/ListeningTestLogger.java b/modules/core/src/test/java/org/apache/ignite/testframework/ListeningTestLogger.java
index 1b05f4c..54f3c4b 100644
--- a/modules/core/src/test/java/org/apache/ignite/testframework/ListeningTestLogger.java
+++ b/modules/core/src/test/java/org/apache/ignite/testframework/ListeningTestLogger.java
@@ -80,6 +80,19 @@ public class ListeningTestLogger implements IgniteLogger {
}
/**
+ * Resets and registers all message listeners.
+ *
+ * @param lsnrs Message listeners.
+ */
+ public void registerAllListeners(@NotNull LogListener... lsnrs) {
+ for (LogListener lsnr : lsnrs) {
+ lsnr.reset();
+
+ this.lsnrs.add(lsnr);
+ }
+ }
+
+ /**
* Registers message listener.
* <p>
* NOTE listener is executed in the thread causing the logging, so it is not recommended to throw any exceptions