You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ignite.apache.org by ag...@apache.org on 2020/01/23 10:55:29 UTC

[ignite] branch master updated: IGNITE-12439 Warning about possible OOME if cache with too many partitions is created in a limited-size data region

This is an automated email from the ASF dual-hosted git repository.

agura pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ignite.git


The following commit(s) were added to refs/heads/master by this push:
     new 3639f92  IGNITE-12439 Warning about possible OOME if cache with too many partitions is created in a limited-size data region
3639f92 is described below

commit 3639f92c7d3a50197ce90805215d63a52a9cda4f
Author: Sergey Chugunov <sc...@gridgain.com>
AuthorDate: Thu Jan 23 13:50:21 2020 +0300

    IGNITE-12439 Warning about possible OOME if cache with too many partitions is created in a limited-size data region
    
    Signed-off-by: Andrey Gura <ag...@apache.org>
---
 .../affinity/IdealAffinityAssignment.java          |   2 +-
 .../cache/CacheAffinitySharedManager.java          | 116 ++++++++
 .../processors/cache/CacheGroupMetricsImpl.java    |  19 ++
 .../GridCacheDatabaseSharedManager.java            |   2 +
 .../cache/persistence/GridCacheOffheapManager.java |   8 +-
 .../cache/persistence/pagemem/PageMemoryEx.java    |  13 +
 .../cache/persistence/pagemem/PageMemoryImpl.java  |  20 ++
 .../apache/ignite/internal/util/IgniteUtils.java   |  14 +-
 .../cache/CacheDataRegionConfigurationTest.java    | 305 +++++++++++++++++++++
 .../processors/cache/CacheGroupMetricsTest.java    |  31 +++
 .../ignite/testframework/ListeningTestLogger.java  |  13 +
 11 files changed, 540 insertions(+), 3 deletions(-)

diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/affinity/IdealAffinityAssignment.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/affinity/IdealAffinityAssignment.java
index c930ec5..58fb341 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/affinity/IdealAffinityAssignment.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/affinity/IdealAffinityAssignment.java
@@ -90,7 +90,7 @@ public class IdealAffinityAssignment {
      * @param nodes Nodes.
      * @param assignment Assignment.
      */
-    private static Map<Object, Set<Integer>> calculatePrimaries(
+    public static Map<Object, Set<Integer>> calculatePrimaries(
         @Nullable List<ClusterNode> nodes,
         List<List<ClusterNode>> assignment
     ) {
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheAffinitySharedManager.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheAffinitySharedManager.java
index 2d8ce7c..46ca65f 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheAffinitySharedManager.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheAffinitySharedManager.java
@@ -18,6 +18,7 @@
 package org.apache.ignite.internal.processors.cache;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
@@ -26,6 +27,7 @@ import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Optional;
 import java.util.Set;
 import java.util.UUID;
 import java.util.concurrent.ConcurrentHashMap;
@@ -38,6 +40,8 @@ import org.apache.ignite.IgniteSystemProperties;
 import org.apache.ignite.cache.affinity.AffinityFunction;
 import org.apache.ignite.cluster.ClusterNode;
 import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.configuration.DataRegionConfiguration;
+import org.apache.ignite.configuration.DataStorageConfiguration;
 import org.apache.ignite.configuration.NearCacheConfiguration;
 import org.apache.ignite.events.DiscoveryEvent;
 import org.apache.ignite.events.Event;
@@ -110,6 +114,9 @@ public class CacheAffinitySharedManager<K, V> extends GridCacheSharedManagerAdap
     /** Affinity information for all started caches (initialized on coordinator). */
     private ConcurrentMap<Integer, CacheGroupHolder> grpHolders = new ConcurrentHashMap<>();
 
+    /** */
+    private CacheMemoryOverheadValidator validator = new CacheMemoryOverheadValidator();
+
     /** Topology version which requires affinity re-calculation (set from discovery thread). */
     private AffinityTopologyVersion lastAffVer;
 
@@ -1023,6 +1030,8 @@ public class CacheAffinitySharedManager<K, V> extends GridCacheSharedManagerAdap
                 fut.timeBag().finishLocalStage("Affinity initialization on cache group start " +
                     "[grp=" + grpDesc.cacheOrGroupName() + "]");
 
+                validator.validateCacheGroup(grpDesc);
+
                 return null;
             }
         );
@@ -1903,6 +1912,8 @@ public class CacheAffinitySharedManager<K, V> extends GridCacheSharedManagerAdap
 
                 fut.timeBag().finishLocalStage("Affinity centralized initialization (crd) " +
                     "[grp=" + desc.cacheOrGroupName() + ", crd=" + crd + "]");
+
+                validator.validateCacheGroup(desc);
             }
         });
 
@@ -2868,4 +2879,109 @@ public class CacheAffinitySharedManager<K, V> extends GridCacheSharedManagerAdap
             return "WaitRebalanceInfo [topVer=" + topVer + ", grps=" + waitGrps + ']';
         }
     }
+
+    /**
+     * Validator for memory overhead of persistent caches.
+     *
+     * Persistent cache requires some overhead in dataregion memory, e.g. a metapage per partition created by the cache.
+     * If this overhead reaches some limit (hardcoded to 15% for now) it may cause critical errors on node during
+     * checkpoint.
+     *
+     * Validator is intended to analyze cache group configuration and print warning to log to inform user about
+     * found problem.
+     */
+    class CacheMemoryOverheadValidator {
+        /** */
+        private static final double MEMORY_OVERHEAD_THRESHOLD = 0.15;
+
+        /**
+         * Validates cache group configuration and prints warning if it violates 15% overhead limit.
+         *
+         * @param grpDesc Descriptor of cache group to validate.
+         */
+        void validateCacheGroup(CacheGroupDescriptor grpDesc) {
+            DataStorageConfiguration dsCfg = cctx.gridConfig().getDataStorageConfiguration();
+            CacheConfiguration<?, ?> grpCfg = grpDesc.config();
+
+            if (!CU.isPersistentCache(grpCfg, dsCfg) || CU.isSystemCache(grpDesc.cacheOrGroupName()))
+                return;
+
+            CacheGroupHolder grpHolder = grpHolders.get(grpDesc.groupId());
+
+            if (grpHolder != null) {
+                int partsNum = 0;
+                UUID locNodeId = cctx.localNodeId();
+
+                List<List<ClusterNode>> assignment = grpHolder.aff.idealAssignment().assignment();
+
+                for (List<ClusterNode> nodes : assignment) {
+                    if (nodes.stream().anyMatch(n -> n.id().equals(locNodeId)))
+                        partsNum++;
+                }
+
+                if (partsNum == 0)
+                    return;
+
+                DataRegionConfiguration drCfg = findDataRegion(dsCfg, grpCfg.getDataRegionName());
+
+                if (drCfg == null)
+                    return;
+
+                if ((1.0 * partsNum * dsCfg.getPageSize()) / drCfg.getMaxSize() > MEMORY_OVERHEAD_THRESHOLD)
+                    log.warning(buildWarningMessage(grpDesc, drCfg, dsCfg.getPageSize(), partsNum));
+            }
+        }
+
+        /**
+         * Builds explanatory warning message.
+         *
+         * @param grpDesc Configuration of cache group violating memory overhead threshold.
+         * @param drCfg Configuration of data region configuration with not sufficient memory.
+         */
+        private String buildWarningMessage(CacheGroupDescriptor grpDesc,
+            DataRegionConfiguration drCfg,
+            int pageSize,
+            int partsNum
+            ) {
+            String res = "Cache group '%s'" +
+                " brings high overhead for its metainformation in data region '%s'."  +
+                " Metainformation required for its partitions (%d partitions, %d bytes per partition, %d MBs total)" +
+                " will consume more than 15%% of data region memory (%d MBs)." +
+                " It may lead to critical errors on the node and cluster instability." +
+                " Please reduce number of partitions, add more memory to the data region" +
+                " or add more server nodes for this cache group.";
+
+            return String.format(
+                    res,
+                    grpDesc.cacheOrGroupName(),
+                    drCfg.getName(),
+                    partsNum,
+                    pageSize,
+                    U.sizeInMegabytes(partsNum * pageSize),
+                    U.sizeInMegabytes(drCfg.getMaxSize())
+                );
+        }
+
+        /**
+         * Finds data region by name.
+         *
+         * @param dsCfg Data storage configuration.
+         * @param drName Data region name.
+         *
+         * @return Found data region.
+         */
+        @Nullable private DataRegionConfiguration findDataRegion(DataStorageConfiguration dsCfg, String drName) {
+            if (dsCfg.getDataRegionConfigurations() == null || drName == null)
+                return dsCfg.getDefaultDataRegionConfiguration();
+
+            if (dsCfg.getDefaultDataRegionConfiguration().getName().equals(drName))
+                return dsCfg.getDefaultDataRegionConfiguration();
+
+            Optional<DataRegionConfiguration> cfgOpt = Arrays.stream(dsCfg.getDataRegionConfigurations())
+                .filter(drCfg -> drCfg.getName().equals(drName))
+                .findFirst();
+
+            return cfgOpt.isPresent() ? cfgOpt.get() : null;
+        }
+    }
 }
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheGroupMetricsImpl.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheGroupMetricsImpl.java
index b1394bc..51c00e6 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheGroupMetricsImpl.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/CacheGroupMetricsImpl.java
@@ -69,6 +69,9 @@ public class CacheGroupMetricsImpl {
     /** */
     private final LongMetric sparseStorageSize;
 
+    /** Number of local partitions initialized on current node. */
+    private final AtomicLongMetric initLocalPartitionsNumber;
+
     /** Interface describing a predicate of two integers. */
     private interface IntBiPredicate {
         /**
@@ -105,6 +108,8 @@ public class CacheGroupMetricsImpl {
         idxBuildCntPartitionsLeft = mreg.longMetric("IndexBuildCountPartitionsLeft",
             "Number of partitions need processed for finished indexes create or rebuilding.");
 
+        initLocalPartitionsNumber = mreg.longMetric("InitializedLocalPartitionsNumber", "Number of local partitions initialized on current node.");
+
         DataRegion region = ctx.dataRegion();
 
         // On client node, region is null.
@@ -188,6 +193,20 @@ public class CacheGroupMetricsImpl {
         idxBuildCntPartitionsLeft.decrement();
     }
 
+    /**
+     * Increments number of local partitions initialized on current node.
+     */
+    public void incrementInitializedLocalPartitions() {
+        initLocalPartitionsNumber.increment();
+    }
+
+    /**
+     * Decrements number of local partitions initialized on current node.
+     */
+    public void decrementInitializedLocalPartitions() {
+        initLocalPartitionsNumber.decrement();
+    }
+
     /** */
     public int getGroupId() {
         return ctx.groupId();
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheDatabaseSharedManager.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheDatabaseSharedManager.java
index 4b6d317..8000f5f 100755
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheDatabaseSharedManager.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheDatabaseSharedManager.java
@@ -3957,6 +3957,8 @@ public class GridCacheDatabaseSharedManager extends IgniteCacheDatabaseSharedMan
 
                         req.onDone(null);
 
+                        grp.metrics().decrementInitializedLocalPartitions();
+
                         if (log.isDebugEnabled())
                             log.debug("Partition file has destroyed [grpId=" + grpId + ", partId=" + partId + "]");
                     }
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheOffheapManager.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheOffheapManager.java
index af7998e..3867a63 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheOffheapManager.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheOffheapManager.java
@@ -1891,7 +1891,13 @@ public class GridCacheOffheapManager extends IgniteCacheOffheapManagerImpl imple
             int grpId = grp.groupId();
             long partMetaId = pageMem.partitionMetaPageId(grpId, partId);
 
-            long partMetaPage = pageMem.acquirePage(grpId, partMetaId);
+            AtomicBoolean metaPageAllocated = new AtomicBoolean(false);
+
+            long partMetaPage = pageMem.acquirePage(grpId, partMetaId, metaPageAllocated);
+
+            if (metaPageAllocated.get())
+                grp.metrics().incrementInitializedLocalPartitions();
+
             try {
                 boolean allocated = false;
                 boolean pendingTreeAllocated = false;
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/pagemem/PageMemoryEx.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/pagemem/PageMemoryEx.java
index ce615ed..8bba4e1 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/pagemem/PageMemoryEx.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/pagemem/PageMemoryEx.java
@@ -18,6 +18,7 @@
 package org.apache.ignite.internal.processors.cache.persistence.pagemem;
 
 import java.nio.ByteBuffer;
+import java.util.concurrent.atomic.AtomicBoolean;
 import org.apache.ignite.IgniteCheckedException;
 import org.apache.ignite.IgniteException;
 import org.apache.ignite.internal.IgniteInternalFuture;
@@ -85,6 +86,18 @@ public interface PageMemoryEx extends PageMemory {
 
     /**
      * @see #acquirePage(int, long)
+     * Sets additional flag indicating that page was not found in memory and had to be allocated.
+     *
+     * @param grpId Cache group ID.
+     * @param pageId Page ID.
+     * @param pageAllocated Flag is set if new page was allocated in offheap memory.
+     * @return Page.
+     * @throws IgniteCheckedException
+     */
+    public long acquirePage(int grpId, long pageId, AtomicBoolean pageAllocated) throws IgniteCheckedException;
+
+    /**
+     * @see #acquirePage(int, long)
      * Will read page from file if it is not present in memory
      *
      * @param grpId Cache group ID.
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/pagemem/PageMemoryImpl.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/pagemem/PageMemoryImpl.java
index 1af5032..d2a8562 100755
--- a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/pagemem/PageMemoryImpl.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/pagemem/PageMemoryImpl.java
@@ -680,8 +680,25 @@ public class PageMemoryImpl implements PageMemoryEx {
     }
 
     /** {@inheritDoc} */
+    @Override public long acquirePage(int grpId, long pageId, AtomicBoolean pageAllocated) throws IgniteCheckedException {
+        return acquirePage(grpId, pageId, IoStatisticsHolderNoOp.INSTANCE, false, pageAllocated);
+    }
+
+    /** {@inheritDoc} */
     @Override public long acquirePage(int grpId, long pageId, IoStatisticsHolder statHolder,
         boolean restore) throws IgniteCheckedException {
+        return acquirePage(grpId, pageId, statHolder, restore, null);
+    }
+
+    /**
+     * @param grpId Group id.
+     * @param pageId Page id.
+     * @param statHolder Stat holder.
+     * @param restore Restore.
+     * @param pageAllocated Page allocated.
+     */
+    private long acquirePage(int grpId, long pageId, IoStatisticsHolder statHolder,
+        boolean restore, @Nullable AtomicBoolean pageAllocated) throws IgniteCheckedException {
         assert started;
 
         FullPageId fullId = new FullPageId(pageId, grpId);
@@ -739,6 +756,9 @@ public class PageMemoryImpl implements PageMemoryEx {
             if (relPtr == INVALID_REL_PTR) {
                 relPtr = seg.borrowOrAllocateFreePage(pageId);
 
+                if (pageAllocated != null)
+                    pageAllocated.set(true);
+
                 if (relPtr == INVALID_REL_PTR)
                     relPtr = seg.removePageForReplacement(delayedWriter == null ? flushDirtyPage : delayedWriter);
 
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/util/IgniteUtils.java b/modules/core/src/main/java/org/apache/ignite/internal/util/IgniteUtils.java
index e11a896..c24b637 100755
--- a/modules/core/src/main/java/org/apache/ignite/internal/util/IgniteUtils.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/util/IgniteUtils.java
@@ -299,6 +299,9 @@ import static org.apache.ignite.internal.util.GridUnsafe.staticFieldOffset;
 @SuppressWarnings({"UnusedReturnValue", "RedundantStringConstructorCall"})
 public abstract class IgniteUtils {
     /** */
+    public static final long MB = 1024L * 1024;
+
+    /** */
     public static final long GB = 1024L * 1024 * 1024;
 
     /** Minimum checkpointing page buffer size (may be adjusted by Ignite). */
@@ -3670,6 +3673,16 @@ public abstract class IgniteUtils {
     }
 
     /**
+     * Converts size in bytes to human-readable size in megabytes.
+     *
+     * @param sizeInBytes Size of any object (file, memory region etc) in bytes.
+     * @return Size converted to megabytes.
+     */
+    public static int sizeInMegabytes(long sizeInBytes) {
+        return (int)(sizeInBytes / MB);
+    }
+
+    /**
      * Deletes file or directory with all sub-directories and files.
      *
      * @param path File or directory to delete.
@@ -5426,7 +5439,6 @@ public abstract class IgniteUtils {
         return map;
     }
 
-
     /**
      * Calculate a hashCode for an array.
      *
diff --git a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/CacheDataRegionConfigurationTest.java b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/CacheDataRegionConfigurationTest.java
index 887679a..2754390 100644
--- a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/CacheDataRegionConfigurationTest.java
+++ b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/CacheDataRegionConfigurationTest.java
@@ -14,11 +14,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package org.apache.ignite.internal.processors.cache;
 
 import java.util.concurrent.Callable;
 import org.apache.ignite.IgniteCache;
 import org.apache.ignite.IgniteCheckedException;
+import org.apache.ignite.IgniteLogger;
+import org.apache.ignite.cache.CacheMode;
+import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
+import org.apache.ignite.cluster.ClusterNode;
 import org.apache.ignite.configuration.CacheConfiguration;
 import org.apache.ignite.configuration.DataPageEvictionMode;
 import org.apache.ignite.configuration.DataRegionConfiguration;
@@ -26,11 +31,17 @@ import org.apache.ignite.configuration.DataStorageConfiguration;
 import org.apache.ignite.configuration.IgniteConfiguration;
 import org.apache.ignite.internal.IgniteEx;
 import org.apache.ignite.internal.mem.IgniteOutOfMemoryException;
+import org.apache.ignite.internal.util.typedef.internal.U;
+import org.apache.ignite.lang.IgnitePredicate;
 import org.apache.ignite.testframework.GridTestUtils;
+import org.apache.ignite.testframework.ListeningTestLogger;
+import org.apache.ignite.testframework.LogListener;
 import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
 import org.jetbrains.annotations.Nullable;
 import org.junit.Test;
 
+import static org.apache.ignite.configuration.DataStorageConfiguration.DFLT_PAGE_SIZE;
+
 /**
  *
  */
@@ -42,6 +53,9 @@ public class CacheDataRegionConfigurationTest extends GridCommonAbstractTest {
     private volatile DataStorageConfiguration memCfg;
 
     /** */
+    private IgniteLogger logger;
+
+    /** */
     private static final long DFLT_MEM_PLC_SIZE = 10L * 1024 * 1024;
 
     /** */
@@ -51,6 +65,15 @@ public class CacheDataRegionConfigurationTest extends GridCommonAbstractTest {
     @Override protected IgniteConfiguration getConfiguration(String gridName) throws Exception {
         IgniteConfiguration cfg = super.getConfiguration(gridName);
 
+        if (logger != null)
+            cfg.setGridLogger(logger);
+
+        if (gridName.contains("client")) {
+            cfg.setClientMode(true);
+
+            return cfg;
+        }
+
         if (memCfg != null)
             cfg.setDataStorageConfiguration(memCfg);
 
@@ -63,6 +86,14 @@ public class CacheDataRegionConfigurationTest extends GridCommonAbstractTest {
     /** {@inheritDoc} */
     @Override protected void afterTest() throws Exception {
         stopAllGrids();
+
+        cleanPersistenceDir();
+    }
+
+    @Override protected void beforeTest() throws Exception {
+        stopAllGrids();
+
+        cleanPersistenceDir();
     }
 
     /** */
@@ -203,6 +234,280 @@ public class CacheDataRegionConfigurationTest extends GridCommonAbstractTest {
     }
 
     /**
+     * Filter to exclude the node from affinity nodes by its name.
+     */
+    private static class NodeNameNodeFilter implements IgnitePredicate<ClusterNode> {
+        /** */
+        private final String filteredNode;
+
+        /**
+         * @param node Node.
+         */
+        private NodeNameNodeFilter(String node) {
+            filteredNode = node;
+        }
+
+        /** {@inheritDoc} */
+        @Override public boolean apply(ClusterNode node) {
+            return !node.attribute("org.apache.ignite.ignite.name").toString().contains(filteredNode);
+        }
+    }
+
+    /**
+     * Verifies that warning message is printed to the logs if user tries to start a static cache in data region which
+     * overhead (e.g. metapages for partitions) occupies more space of the region than a defined threshold (15%)
+     *
+     * @throws Exception If failed.
+     */
+    @Test
+    public void testWarningIfStaticCacheOverheadExceedsThreshold() throws Exception {
+        DataRegionConfiguration smallRegionCfg = new DataRegionConfiguration();
+        int numOfPartitions = 512;
+        int partitionsMetaMemoryChunk = U.sizeInMegabytes(512 * DFLT_PAGE_SIZE);
+
+        smallRegionCfg.setInitialSize(DFLT_MEM_PLC_SIZE);
+        smallRegionCfg.setMaxSize(DFLT_MEM_PLC_SIZE);
+        smallRegionCfg.setPersistenceEnabled(true);
+        smallRegionCfg.setName("smallRegion");
+
+        memCfg = new DataStorageConfiguration();
+        memCfg.setDefaultDataRegionConfiguration(smallRegionCfg);
+        //one hour to guarantee that checkpoint will be triggered by 'dirty pages amount' trigger
+        memCfg.setCheckpointFrequency(60 * 60 * 1000);
+
+        CacheConfiguration<Object, Object> manyPartitionsCache = new CacheConfiguration<>(DEFAULT_CACHE_NAME);
+
+        //512 partitions are enough only if primary and backups count
+        manyPartitionsCache.setAffinity(new RendezvousAffinityFunction(false, numOfPartitions));
+        manyPartitionsCache.setBackups(1);
+
+        ccfg = manyPartitionsCache;
+
+        ListeningTestLogger srv0Logger = new ListeningTestLogger(false, null);
+        LogListener cacheGrpLsnr0 = LogListener.matches("Cache group 'default' brings high overhead").build();
+        LogListener dataRegLsnr0 = LogListener.matches("metainformation in data region 'smallRegion'").build();
+        LogListener partsInfoLsnr0 = LogListener.matches(numOfPartitions + " partitions, " +
+            DFLT_PAGE_SIZE +
+            " bytes per partition, " + partitionsMetaMemoryChunk + " MBs total").build();
+        srv0Logger.registerAllListeners(cacheGrpLsnr0, dataRegLsnr0, partsInfoLsnr0);
+        logger = srv0Logger;
+
+        IgniteEx ignite0 = startGrid("srv0");
+
+        ListeningTestLogger srv1Logger = new ListeningTestLogger(false, null);
+        LogListener cacheGrpLsnr1 = LogListener.matches("Cache group 'default' brings high overhead").build();
+        LogListener dataRegLsnr1 = LogListener.matches("metainformation in data region 'smallRegion'").build();
+        LogListener partsInfoLsnr1 = LogListener.matches(numOfPartitions + " partitions, " +
+            DFLT_PAGE_SIZE +
+            " bytes per partition, " + partitionsMetaMemoryChunk + " MBs total").build();
+        srv1Logger.registerAllListeners(cacheGrpLsnr1, dataRegLsnr1, partsInfoLsnr1);
+        logger = srv1Logger;
+
+        startGrid("srv1");
+
+        ignite0.cluster().active(true);
+
+        //srv0 and srv1 print warning into the log as the threshold for cache in default cache group is broken
+        assertTrue(cacheGrpLsnr0.check());
+        assertTrue(dataRegLsnr0.check());
+        assertTrue(partsInfoLsnr0.check());
+
+        assertTrue(cacheGrpLsnr1.check());
+        assertTrue(dataRegLsnr1.check());
+        assertTrue(partsInfoLsnr1.check());
+    }
+
+    /**
+     * Verifies that warning message is printed to the logs if user tries to start a dynamic cache in data region which
+     * overhead (e.g. metapages for partitions) occupies more space of the region than a defined threshold.
+     *
+     * @throws Exception If failed.
+     */
+    @Test
+    public void testWarningIfDynamicCacheOverheadExceedsThreshold() throws Exception {
+        String filteredSrvName = "srv2";
+        int numOfPartitions = 512;
+        int partitionsMetaMemoryChunk = U.sizeInMegabytes(512 * DFLT_PAGE_SIZE);
+
+        DataRegionConfiguration smallRegionCfg = new DataRegionConfiguration();
+
+        smallRegionCfg.setName("smallRegion");
+        smallRegionCfg.setInitialSize(DFLT_MEM_PLC_SIZE);
+        smallRegionCfg.setMaxSize(DFLT_MEM_PLC_SIZE);
+        smallRegionCfg.setPersistenceEnabled(true);
+
+        //explicit default data region configuration to test possible NPE case
+        DataRegionConfiguration defaultRegionCfg = new DataRegionConfiguration();
+        defaultRegionCfg.setName("defaultRegion");
+        defaultRegionCfg.setInitialSize(DFLT_MEM_PLC_SIZE);
+        defaultRegionCfg.setMaxSize(DFLT_MEM_PLC_SIZE);
+        defaultRegionCfg.setPersistenceEnabled(true);
+
+        memCfg = new DataStorageConfiguration();
+        memCfg.setDefaultDataRegionConfiguration(defaultRegionCfg);
+        memCfg.setDataRegionConfigurations(smallRegionCfg);
+        //one hour to guarantee that checkpoint will be triggered by 'dirty pages amount' trigger
+        memCfg.setCheckpointFrequency(60 * 60 * 1000);
+
+        ListeningTestLogger srv0Logger = new ListeningTestLogger(false, null);
+        LogListener cacheGrpLsnr0 = LogListener.matches("Cache group 'default' brings high overhead").build();
+        LogListener dataRegLsnr0 = LogListener.matches("metainformation in data region 'defaultRegion'").build();
+        LogListener partsInfoLsnr0 = LogListener.matches(numOfPartitions + " partitions, " +
+            DFLT_PAGE_SIZE +
+            " bytes per partition, " + partitionsMetaMemoryChunk + " MBs total").build();
+        srv0Logger.registerAllListeners(cacheGrpLsnr0, dataRegLsnr0, partsInfoLsnr0);
+        logger = srv0Logger;
+
+        IgniteEx ignite0 = startGrid("srv0");
+
+        ListeningTestLogger srv1Logger = new ListeningTestLogger(false, null);
+        LogListener cacheGrpLsnr1 = LogListener.matches("Cache group 'default' brings high overhead").build();
+        LogListener dataRegLsnr1 = LogListener.matches("metainformation in data region 'defaultRegion'").build();
+        LogListener partsInfoLsnr1 = LogListener.matches(numOfPartitions + " partitions, " +
+            DFLT_PAGE_SIZE +
+            " bytes per partition, " + partitionsMetaMemoryChunk + " MBs total").build();
+        srv1Logger.registerAllListeners(cacheGrpLsnr1, dataRegLsnr1, partsInfoLsnr1);
+        logger = srv1Logger;
+
+        startGrid("srv1");
+
+        ListeningTestLogger srv2Logger = new ListeningTestLogger(false, null);
+        LogListener cacheGrpLsnr2 = LogListener.matches("Cache group 'default' brings high overhead").build();
+        srv2Logger.registerListener(cacheGrpLsnr2);
+        logger = srv2Logger;
+
+        startGrid("srv2");
+
+        ignite0.cluster().active(true);
+
+        IgniteEx cl = startGrid("client01");
+
+        CacheConfiguration<Object, Object> manyPartitionsCache = new CacheConfiguration<>(DEFAULT_CACHE_NAME);
+
+        manyPartitionsCache.setAffinity(new RendezvousAffinityFunction(false, numOfPartitions));
+        manyPartitionsCache.setNodeFilter(new NodeNameNodeFilter(filteredSrvName));
+        manyPartitionsCache.setBackups(1);
+
+        cl.createCache(manyPartitionsCache);
+
+        //srv0 and srv1 print warning into the log as the threshold for cache in default cache group is broken
+        assertTrue(cacheGrpLsnr0.check());
+        assertTrue(dataRegLsnr0.check());
+        assertTrue(partsInfoLsnr0.check());
+
+        assertTrue(cacheGrpLsnr1.check());
+        assertTrue(dataRegLsnr1.check());
+        assertTrue(partsInfoLsnr1.check());
+
+        //srv2 doesn't print the warning as it is filtered by node filter from affinity nodes
+        assertFalse(cacheGrpLsnr2.check());
+    }
+
+    /**
+     * Verifies that warning is printed out to logs if after removing nodes from baseline
+     * some caches reach or cross dangerous limit of metainformation overhead per data region.
+     *
+     * @throws Exception If failed.
+     */
+    @Test
+    public void testWarningOnBaselineTopologyChange() throws Exception {
+        DataRegionConfiguration defaultRegionCfg = new DataRegionConfiguration();
+        defaultRegionCfg.setInitialSize(DFLT_MEM_PLC_SIZE);
+        defaultRegionCfg.setMaxSize(DFLT_MEM_PLC_SIZE);
+        defaultRegionCfg.setPersistenceEnabled(true);
+
+        memCfg = new DataStorageConfiguration();
+        memCfg.setDefaultDataRegionConfiguration(defaultRegionCfg);
+        //one hour to guarantee that checkpoint will be triggered by 'dirty pages amount' trigger
+        memCfg.setCheckpointFrequency(60 * 60 * 1000);
+
+        ListeningTestLogger srv0Logger = new ListeningTestLogger(false, null);
+        LogListener cacheGrpLsnr0 = LogListener.matches("Cache group 'default' brings high overhead").build();
+        srv0Logger.registerListener(cacheGrpLsnr0);
+        logger = srv0Logger;
+
+        IgniteEx ignite0 = startGrid("srv0");
+
+        ListeningTestLogger srv1Logger = new ListeningTestLogger(false, null);
+        LogListener cacheGrpLsnr1 = LogListener.matches("Cache group 'default' brings high overhead").build();
+        srv1Logger.registerListener(cacheGrpLsnr1);
+        logger = srv1Logger;
+
+        startGrid("srv1");
+
+        ignite0.cluster().active(true);
+
+        ignite0.createCache(
+            new CacheConfiguration<>(DEFAULT_CACHE_NAME)
+                .setDataRegionName(defaultRegionCfg.getName())
+                .setCacheMode(CacheMode.PARTITIONED)
+                .setAffinity(new RendezvousAffinityFunction(false, 512))
+        );
+
+        assertFalse(cacheGrpLsnr0.check());
+        assertFalse(cacheGrpLsnr1.check());
+
+        stopGrid("srv1");
+
+        ignite0.cluster().baselineAutoAdjustEnabled(false);
+
+        long topVer = ignite0.cluster().topologyVersion();
+
+        ignite0.cluster().setBaselineTopology(topVer);
+
+        awaitPartitionMapExchange();
+
+        assertTrue(cacheGrpLsnr0.check());
+    }
+
+    /**
+     * Negative test: verifies that no warning is printed to logs if user starts static and dynamic caches
+     * in data region with enough capacity to host these caches;
+     * in other words, no thresholds for metapages ration are broken.
+     *
+     * @throws Exception If failed.
+     */
+    @Test
+    public void testNoWarningIfCacheConfigurationDoesntBreakThreshold() throws Exception {
+        DataRegionConfiguration defaultRegionCfg = new DataRegionConfiguration();
+        defaultRegionCfg.setInitialSize(DFLT_MEM_PLC_SIZE);
+        defaultRegionCfg.setMaxSize(DFLT_MEM_PLC_SIZE);
+        defaultRegionCfg.setPersistenceEnabled(true);
+
+        memCfg = new DataStorageConfiguration();
+        memCfg.setDefaultDataRegionConfiguration(defaultRegionCfg);
+        //one hour to guarantee that checkpoint will be triggered by 'dirty pages amount' trigger
+        memCfg.setCheckpointFrequency(60 * 60 * 1000);
+
+        CacheConfiguration<Object, Object> fewPartitionsCache = new CacheConfiguration<>(DEFAULT_CACHE_NAME);
+
+        //512 partitions are enough only if primary and backups count
+        fewPartitionsCache.setAffinity(new RendezvousAffinityFunction(false, 16));
+        fewPartitionsCache.setBackups(1);
+
+        ccfg = fewPartitionsCache;
+
+        ListeningTestLogger srv0Logger = new ListeningTestLogger(false, null);
+        LogListener cacheGrpLsnr0 = LogListener.matches("Cache group 'default' brings high overhead").build();
+        LogListener dynamicGrpLsnr = LogListener.matches("Cache group 'dynamicCache' brings high overhead").build();
+        srv0Logger.registerListener(cacheGrpLsnr0);
+        srv0Logger.registerListener(dynamicGrpLsnr);
+        logger = srv0Logger;
+
+        IgniteEx ignite0 = startGrid("srv0");
+
+        ignite0.cluster().active(true);
+
+        assertFalse(cacheGrpLsnr0.check());
+
+        ignite0.createCache(new CacheConfiguration<>("dynamicCache")
+            .setAffinity(new RendezvousAffinityFunction(false, 16))
+        );
+
+        assertFalse(dynamicGrpLsnr.check());
+    }
+
+    /**
      * Verifies that {@link IgniteCheckedException} is thrown when page eviction threshold is less than 0.5.
      */
     @Test
diff --git a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/CacheGroupMetricsTest.java b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/CacheGroupMetricsTest.java
index b0b5da4..eac1ab1 100644
--- a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/CacheGroupMetricsTest.java
+++ b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/CacheGroupMetricsTest.java
@@ -47,6 +47,7 @@ import org.apache.ignite.events.EventType;
 import org.apache.ignite.internal.IgniteEx;
 import org.apache.ignite.internal.processors.metric.GridMetricManager;
 import org.apache.ignite.internal.processors.metric.MetricRegistry;
+import org.apache.ignite.internal.processors.metric.impl.AtomicLongMetric;
 import org.apache.ignite.internal.util.lang.GridAbsPredicate;
 import org.apache.ignite.internal.util.typedef.F;
 import org.apache.ignite.internal.util.typedef.T2;
@@ -415,4 +416,34 @@ public class CacheGroupMetricsTest extends GridCommonAbstractTest implements Ser
             mxBean0Grp2.get2().<LongMetric>findMetric("TotalAllocatedPages").value() +
             mxBean0Grp3.get2().<LongMetric>findMetric("TotalAllocatedPages").value());
     }
+
+    /**
+     * Verifies metric for initialized local partitions.
+     * It is incremented when partition is actually created on node and decremented when it is destroyed.
+     *
+     * @throws Exception If failed.
+     */
+    @Test
+    public void testInitializedLocalPartitions() throws Exception {
+        pds = true;
+
+        cleanPersistenceDir();
+
+        IgniteEx ignite = startGrid(0);
+
+        ignite.cluster().active(true);
+
+        MetricRegistry group1Metrics = cacheGroupMetrics(0, "group1").get2();
+
+        AtomicLongMetric locPartsNum = group1Metrics.findMetric("InitializedLocalPartitionsNumber");
+
+        assertEquals(0, locPartsNum.value());
+
+        IgniteCache cache = ignite.cache("cache1");
+
+        for (int i = 0; i < 10; i++)
+            cache.put(i, new byte[100]);
+
+        assertEquals(10, locPartsNum.value());
+    }
 }
diff --git a/modules/core/src/test/java/org/apache/ignite/testframework/ListeningTestLogger.java b/modules/core/src/test/java/org/apache/ignite/testframework/ListeningTestLogger.java
index 1b05f4c..54f3c4b 100644
--- a/modules/core/src/test/java/org/apache/ignite/testframework/ListeningTestLogger.java
+++ b/modules/core/src/test/java/org/apache/ignite/testframework/ListeningTestLogger.java
@@ -80,6 +80,19 @@ public class ListeningTestLogger implements IgniteLogger {
     }
 
     /**
+     * Resets and registers all message listeners.
+     *
+     * @param lsnrs Message listeners.
+     */
+    public void registerAllListeners(@NotNull LogListener... lsnrs) {
+        for (LogListener lsnr : lsnrs) {
+            lsnr.reset();
+
+            this.lsnrs.add(lsnr);
+        }
+    }
+
+    /**
      * Registers message listener.
      * <p>
      * NOTE listener is executed in the thread causing the logging, so it is not recommended to throw any exceptions