You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by bl...@apache.org on 2022/10/21 10:11:23 UTC

[cassandra] 01/05: Further improves precision of memtable heap tracking

This is an automated email from the ASF dual-hosted git repository.

blambov pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/cassandra.git

commit 49e0c61107005b1a83799f7f1e6c0a855d159c29
Author: Branimir Lambov <br...@datastax.com>
AuthorDate: Tue Nov 9 13:59:48 2021 +0200

    Further improves precision of memtable heap tracking
    
    Partition key ByteBuffer and columns btree were not taken
    into account and some ByteBuffers were not measured correctly.
    
    Also fixes flakes in MemtableSizeTest caused by including
    allocator pool in measurements and updates it to test all
    memtable allocation types.
    
    patch by Branimir Lambov; reviewed by Ekaterina Dimitrova, Andres de la Peña and Caleb Rackliffe for CASSANDRA-17240
---
 .../org/apache/cassandra/db/ArrayClustering.java   |   5 +
 .../org/apache/cassandra/db/BufferClustering.java  |   4 +
 src/java/org/apache/cassandra/db/Columns.java      |   2 +-
 .../db/memtable/AbstractAllocatorMemtable.java     |   8 +-
 .../cassandra/db/memtable/AbstractMemtable.java    |   2 +
 .../cassandra/db/memtable/SkipListMemtable.java    |  14 +-
 .../db/partitions/AtomicBTreePartition.java        |   6 +-
 .../org/apache/cassandra/utils/ObjectSizes.java    |  13 ++
 .../org/apache/cassandra/utils/btree/BTree.java    |   3 +
 .../apache/cassandra/cql3/MemtableSizeTest.java    | 157 ----------------
 .../cassandra/db/ClusteringHeapSizeTest.java       |  16 ++
 .../{cql3 => db/memtable}/MemtableQuickTest.java   |   4 +-
 .../db/memtable/MemtableSizeHeapBuffersTest.java   |  48 +++++
 .../memtable/MemtableSizeOffheapBuffersTest.java   |  49 +++++
 .../memtable/MemtableSizeOffheapObjectsTest.java   |  47 +++++
 .../db/memtable/MemtableSizeTestBase.java          | 199 +++++++++++++++++++++
 .../db/memtable/MemtableSizeUnslabbedTest.java     |  47 +++++
 17 files changed, 459 insertions(+), 165 deletions(-)

diff --git a/src/java/org/apache/cassandra/db/ArrayClustering.java b/src/java/org/apache/cassandra/db/ArrayClustering.java
index 53d45e7474..b04910c434 100644
--- a/src/java/org/apache/cassandra/db/ArrayClustering.java
+++ b/src/java/org/apache/cassandra/db/ArrayClustering.java
@@ -18,6 +18,7 @@
 
 package org.apache.cassandra.db;
 
+import org.apache.cassandra.db.marshal.ByteArrayAccessor;
 import org.apache.cassandra.utils.ObjectSizes;
 
 public class ArrayClustering extends AbstractArrayClusteringPrefix implements Clustering<byte[]>
@@ -31,6 +32,8 @@ public class ArrayClustering extends AbstractArrayClusteringPrefix implements Cl
 
     public long unsharedHeapSize()
     {
+        if (this == ByteArrayAccessor.factory.clustering() || this == ByteArrayAccessor.factory.staticClustering())
+            return 0;
         long arrayRefSize = ObjectSizes.sizeOfArray(values);
         long elementsSize = 0;
         for (int i = 0; i < values.length; i++)
@@ -40,6 +43,8 @@ public class ArrayClustering extends AbstractArrayClusteringPrefix implements Cl
 
     public long unsharedHeapSizeExcludingData()
     {
+        if (this == ByteArrayAccessor.factory.clustering() || this == ByteArrayAccessor.factory.staticClustering())
+            return 0;
         return EMPTY_SIZE + ObjectSizes.sizeOfArray(values);
     }
 
diff --git a/src/java/org/apache/cassandra/db/BufferClustering.java b/src/java/org/apache/cassandra/db/BufferClustering.java
index e3592e1d6d..ed6d61c82c 100644
--- a/src/java/org/apache/cassandra/db/BufferClustering.java
+++ b/src/java/org/apache/cassandra/db/BufferClustering.java
@@ -43,11 +43,15 @@ public class BufferClustering extends AbstractBufferClusteringPrefix implements
 
     public long unsharedHeapSize()
     {
+        if (this == Clustering.EMPTY || this == Clustering.STATIC_CLUSTERING)
+            return 0;
         return EMPTY_SIZE + ObjectSizes.sizeOnHeapOf(values);
     }
 
     public long unsharedHeapSizeExcludingData()
     {
+        if (this == Clustering.EMPTY || this == Clustering.STATIC_CLUSTERING)
+            return 0;
         return EMPTY_SIZE + ObjectSizes.sizeOnHeapExcludingData(values);
     }
 
diff --git a/src/java/org/apache/cassandra/db/Columns.java b/src/java/org/apache/cassandra/db/Columns.java
index 88185abf7b..8adce341c9 100644
--- a/src/java/org/apache/cassandra/db/Columns.java
+++ b/src/java/org/apache/cassandra/db/Columns.java
@@ -436,7 +436,7 @@ public class Columns extends AbstractCollection<ColumnMetadata> implements Colle
         if(this == NONE)
             return 0;
 
-        return EMPTY_SIZE;
+        return EMPTY_SIZE + BTree.sizeOfStructureOnHeap(columns);
     }
 
     @Override
diff --git a/src/java/org/apache/cassandra/db/memtable/AbstractAllocatorMemtable.java b/src/java/org/apache/cassandra/db/memtable/AbstractAllocatorMemtable.java
index 227cc6dcc2..66556a16e4 100644
--- a/src/java/org/apache/cassandra/db/memtable/AbstractAllocatorMemtable.java
+++ b/src/java/org/apache/cassandra/db/memtable/AbstractAllocatorMemtable.java
@@ -21,6 +21,7 @@ package org.apache.cassandra.db.memtable;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicReference;
 
+import com.google.common.annotations.VisibleForTesting;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -44,6 +45,7 @@ import org.apache.cassandra.utils.memory.MemtableCleaner;
 import org.apache.cassandra.utils.memory.MemtablePool;
 import org.apache.cassandra.utils.memory.NativePool;
 import org.apache.cassandra.utils.memory.SlabPool;
+import org.github.jamm.Unmetered;
 
 /**
  * A memtable that uses memory tracked and maybe allocated via a MemtableAllocator from a MemtablePool.
@@ -55,17 +57,21 @@ public abstract class AbstractAllocatorMemtable extends AbstractMemtableWithComm
 
     public static final MemtablePool MEMORY_POOL = AbstractAllocatorMemtable.createMemtableAllocatorPool();
 
+    @Unmetered
     protected final Owner owner;
+    @Unmetered  // total pool size should not be included in memtable's deep size
     protected final MemtableAllocator allocator;
 
     // Record the comparator of the CFS at the creation of the memtable. This
     // is only used when a user update the CF comparator, to know if the
     // memtable was created with the new or old comparator.
+    @Unmetered
     protected final ClusteringComparator initialComparator;
 
     private final long creationNano = Clock.Global.nanoTime();
 
-    private static MemtablePool createMemtableAllocatorPool()
+    @VisibleForTesting
+    static MemtablePool createMemtableAllocatorPool()
     {
         long heapLimit = DatabaseDescriptor.getMemtableHeapSpaceInMiB() << 20;
         long offHeapLimit = DatabaseDescriptor.getMemtableOffheapSpaceInMiB() << 20;
diff --git a/src/java/org/apache/cassandra/db/memtable/AbstractMemtable.java b/src/java/org/apache/cassandra/db/memtable/AbstractMemtable.java
index 0ac7482e4a..38249a20d6 100644
--- a/src/java/org/apache/cassandra/db/memtable/AbstractMemtable.java
+++ b/src/java/org/apache/cassandra/db/memtable/AbstractMemtable.java
@@ -33,6 +33,7 @@ import org.apache.cassandra.db.rows.EncodingStats;
 import org.apache.cassandra.schema.ColumnMetadata;
 import org.apache.cassandra.schema.TableMetadata;
 import org.apache.cassandra.schema.TableMetadataRef;
+import org.github.jamm.Unmetered;
 
 public abstract class AbstractMemtable implements Memtable
 {
@@ -46,6 +47,7 @@ public abstract class AbstractMemtable implements Memtable
     // Note: statsCollector has corresponding statistics to the two above, but starts with an epoch value which is not
     // correct for their usage.
 
+    @Unmetered
     protected TableMetadataRef metadata;
 
     public AbstractMemtable(TableMetadataRef metadataRef)
diff --git a/src/java/org/apache/cassandra/db/memtable/SkipListMemtable.java b/src/java/org/apache/cassandra/db/memtable/SkipListMemtable.java
index 17c0ccecf1..99cb237250 100644
--- a/src/java/org/apache/cassandra/db/memtable/SkipListMemtable.java
+++ b/src/java/org/apache/cassandra/db/memtable/SkipListMemtable.java
@@ -17,6 +17,7 @@
  */
 package org.apache.cassandra.db.memtable;
 
+import java.nio.ByteBuffer;
 import java.util.Iterator;
 import java.util.Map;
 import java.util.concurrent.ConcurrentNavigableMap;
@@ -52,11 +53,11 @@ import org.apache.cassandra.index.transactions.UpdateTransaction;
 import org.apache.cassandra.io.sstable.format.SSTableReadsListener;
 import org.apache.cassandra.schema.TableMetadata;
 import org.apache.cassandra.schema.TableMetadataRef;
-import org.apache.cassandra.utils.ByteBufferUtil;
 import org.apache.cassandra.utils.ObjectSizes;
 import org.apache.cassandra.utils.concurrent.OpOrder;
 import org.apache.cassandra.utils.memory.Cloner;
 import org.apache.cassandra.utils.memory.MemtableAllocator;
+import org.apache.cassandra.utils.memory.NativeAllocator;
 
 import static org.apache.cassandra.config.CassandraRelevantProperties.MEMTABLE_OVERHEAD_COMPUTE_STEPS;
 import static org.apache.cassandra.config.CassandraRelevantProperties.MEMTABLE_OVERHEAD_SIZE;
@@ -227,13 +228,18 @@ public class SkipListMemtable extends AbstractAllocatorMemtable
             Cloner cloner = allocator.cloner(group);
             ConcurrentNavigableMap<PartitionPosition, Object> partitions = new ConcurrentSkipListMap<>();
             final Object val = new Object();
+            final int testBufferSize = 8;
             for (int i = 0 ; i < count ; i++)
-                partitions.put(cloner.clone(new BufferDecoratedKey(new LongToken(i), ByteBufferUtil.EMPTY_BYTE_BUFFER)), val);
-            double avgSize = ObjectSizes.measureDeep(partitions) / (double) count;
+                partitions.put(cloner.clone(new BufferDecoratedKey(new LongToken(i), ByteBuffer.allocate(testBufferSize))), val);
+            double avgSize = ObjectSizes.measureDeepOmitShared(partitions) / (double) count;
             rowOverhead = (int) ((avgSize - Math.floor(avgSize)) < 0.05 ? Math.floor(avgSize) : Math.ceil(avgSize));
-            rowOverhead -= ObjectSizes.measureDeep(new LongToken(0));
+            rowOverhead -= new LongToken(0).getHeapSize();
             rowOverhead += AtomicBTreePartition.EMPTY_SIZE;
             rowOverhead += AbstractBTreePartition.HOLDER_UNSHARED_HEAP_SIZE;
+            if (!(allocator instanceof NativeAllocator))
+                rowOverhead -= testBufferSize;  // measureDeepOmitShared includes the given number of bytes even for
+                                                // off-heap buffers, but not for direct memory.
+            // Decorated key overhead with byte buffer (if needed) is included
             allocator.setDiscarding();
             allocator.setDiscarded();
             return rowOverhead;
diff --git a/src/java/org/apache/cassandra/db/partitions/AtomicBTreePartition.java b/src/java/org/apache/cassandra/db/partitions/AtomicBTreePartition.java
index 1fc6b91877..f3dbfc7daf 100644
--- a/src/java/org/apache/cassandra/db/partitions/AtomicBTreePartition.java
+++ b/src/java/org/apache/cassandra/db/partitions/AtomicBTreePartition.java
@@ -39,6 +39,7 @@ import org.apache.cassandra.utils.concurrent.OpOrder;
 import org.apache.cassandra.utils.memory.Cloner;
 import org.apache.cassandra.utils.memory.HeapCloner;
 import org.apache.cassandra.utils.memory.MemtableAllocator;
+import org.github.jamm.Unmetered;
 import com.google.common.annotations.VisibleForTesting;
 
 import static org.apache.cassandra.utils.Clock.Global.nanoTime;
@@ -83,9 +84,12 @@ public final class AtomicBTreePartition extends AbstractBTreePartition
      */
     private volatile int wasteTracker = TRACKER_NEVER_WASTED;
 
+    @Unmetered
     private final MemtableAllocator allocator;
+
     private volatile Holder ref;
 
+    @Unmetered
     private final TableMetadataRef metadata;
 
     public AtomicBTreePartition(TableMetadataRef metadata, DecoratedKey partitionKey, MemtableAllocator allocator)
@@ -372,7 +376,7 @@ public final class AtomicBTreePartition extends AbstractBTreePartition
         @Override
         public Row insert(Row insert)
         {
-            Row data = insert.clone(cloner); 
+            Row data = insert.clone(cloner);
             indexer.onInserted(insert);
 
             this.dataSize += data.dataSize();
diff --git a/src/java/org/apache/cassandra/utils/ObjectSizes.java b/src/java/org/apache/cassandra/utils/ObjectSizes.java
index 07066cf858..9e1b407f1d 100644
--- a/src/java/org/apache/cassandra/utils/ObjectSizes.java
+++ b/src/java/org/apache/cassandra/utils/ObjectSizes.java
@@ -31,6 +31,7 @@ public class ObjectSizes
 {
     private static final MemoryMeter meter = new MemoryMeter().withGuessing(MemoryMeter.Guess.FALLBACK_UNSAFE)
                                                               .ignoreKnownSingletons();
+    private static final MemoryMeter omitSharedMeter = meter.omitSharedBufferOverhead();
 
     private static final long EMPTY_HEAP_BUFFER_SIZE = measure(ByteBufferUtil.EMPTY_BYTE_BUFFER);
     private static final long EMPTY_BYTE_ARRAY_SIZE = measure(new byte[0]);
@@ -215,6 +216,18 @@ public class ObjectSizes
         return meter.measureDeep(pojo);
     }
 
+    /**
+     * @param pojo the object to measure
+     * @return The size on the heap of the instance and all retained heap referenced by it, excluding portions of
+     * ByteBuffer that are not directly referenced by it but including any other referenced that may also be retained
+     * by other objects. This also includes bytes referenced in direct byte buffers, and may double-count memory if
+     * it is referenced by multiple ByteBuffer copies.
+     */
+    public static long measureDeepOmitShared(Object pojo)
+    {
+        return omitSharedMeter.measureDeep(pojo);
+    }
+
     /**
      * @param pojo the object to measure
      * @return the size on the heap of the instance only, excluding any referenced objects
diff --git a/src/java/org/apache/cassandra/utils/btree/BTree.java b/src/java/org/apache/cassandra/utils/btree/BTree.java
index caf0699e75..761629dc87 100644
--- a/src/java/org/apache/cassandra/utils/btree/BTree.java
+++ b/src/java/org/apache/cassandra/utils/btree/BTree.java
@@ -947,6 +947,9 @@ public class BTree
 
     public static long sizeOfStructureOnHeap(Object[] tree)
     {
+        if (tree == EMPTY_LEAF)
+            return 0;
+
         long size = ObjectSizes.sizeOfArray(tree);
         if (isLeaf(tree))
             return size;
diff --git a/test/unit/org/apache/cassandra/cql3/MemtableSizeTest.java b/test/unit/org/apache/cassandra/cql3/MemtableSizeTest.java
deleted file mode 100644
index 89c64723cf..0000000000
--- a/test/unit/org/apache/cassandra/cql3/MemtableSizeTest.java
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.cassandra.cql3;
-
-import java.util.List;
-
-import com.google.common.base.Throwables;
-import com.google.common.collect.ImmutableList;
-import org.junit.Assert;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.apache.cassandra.Util;
-import org.apache.cassandra.config.DatabaseDescriptor;
-import org.apache.cassandra.db.ColumnFamilyStore;
-import org.apache.cassandra.db.Keyspace;
-import org.apache.cassandra.db.memtable.Memtable;
-import org.apache.cassandra.utils.FBUtilities;
-import org.apache.cassandra.utils.ObjectSizes;
-
-@RunWith(Parameterized.class)
-public class MemtableSizeTest extends CQLTester
-{
-    static final Logger logger = LoggerFactory.getLogger(MemtableSizeTest.class);
-
-    static final int partitions = 50_000;
-    static final int rowsPerPartition = 4;
-
-    static final int deletedPartitions = 10_000;
-    static final int deletedRows = 5_000;
-
-    @Parameterized.Parameter(0)
-    public String memtableClass;
-
-    @Parameterized.Parameter(1)
-    public int differencePerPartition;
-
-    @Parameterized.Parameters(name = "{0}")
-    public static List<Object[]> parameters()
-    {
-        return ImmutableList.of(new Object[]{"skiplist", 50},
-                                new Object[]{"skiplist_sharded", 60});
-    }
-
-    // must be within 50 bytes per partition of the actual size
-    final long MAX_DIFFERENCE_PARTITIONS = (partitions + deletedPartitions + deletedRows);
-
-    @BeforeClass
-    public static void setUp()
-    {
-        CQLTester.setUpClass();
-        CQLTester.prepareServer();
-        CQLTester.disablePreparedReuseForTest();
-        logger.info("setupClass done.");
-    }
-
-    @Test
-    public void testSize()
-    {
-        Util.flakyTest(this::testSizeFlaky, 2, "Fails occasionally, see CASSANDRA-16684");
-    }
-
-    private void testSizeFlaky()
-    {
-        try
-        {
-            String keyspace = createKeyspace("CREATE KEYSPACE %s with replication = { 'class' : 'SimpleStrategy', 'replication_factor' : 1 } and durable_writes = false");
-            String table = createTable(keyspace, "CREATE TABLE %s ( userid bigint, picid bigint, commentid bigint, PRIMARY KEY(userid, picid))" +
-                                                 " with compression = {'enabled': false}" +
-                                                 " and memtable = '" + memtableClass + "'");
-            execute("use " + keyspace + ';');
-
-            String writeStatement = "INSERT INTO " + table + "(userid,picid,commentid)VALUES(?,?,?)";
-
-            ColumnFamilyStore cfs = Keyspace.open(keyspace).getColumnFamilyStore(table);
-            cfs.disableAutoCompaction();
-            Util.flush(cfs);
-
-            long deepSizeBefore = ObjectSizes.measureDeep(cfs.getTracker().getView().getCurrentMemtable());
-            logger.info("Memtable deep size before {}\n",
-                        FBUtilities.prettyPrintMemory(deepSizeBefore));
-            long i;
-            long limit = partitions;
-            logger.info("Writing {} partitions of {} rows", partitions, rowsPerPartition);
-            for (i = 0; i < limit; ++i)
-            {
-                for (long j = 0; j < rowsPerPartition; ++j)
-                    execute(writeStatement, i, j, i + j);
-            }
-
-            logger.info("Deleting {} partitions", deletedPartitions);
-            limit += deletedPartitions;
-            for (; i < limit; ++i)
-            {
-                // no partition exists, but we will create a tombstone
-                execute("DELETE FROM " + table + " WHERE userid = ?", i);
-            }
-
-            logger.info("Deleting {} rows", deletedRows);
-            limit += deletedRows;
-            for (; i < limit; ++i)
-            {
-                // no row exists, but we will create a tombstone (and partition)
-                execute("DELETE FROM " + table + " WHERE userid = ? AND picid = ?", i, 0L);
-            }
-
-            if (!cfs.getLiveSSTables().isEmpty())
-                logger.info("Warning: " + cfs.getLiveSSTables().size() + " sstables created.");
-
-            Memtable memtable = cfs.getTracker().getView().getCurrentMemtable();
-            Memtable.MemoryUsage usage = Memtable.getMemoryUsage(memtable);
-            long actualHeap = usage.ownsOnHeap;
-            logger.info("Memtable in {} mode: {} ops, {} serialized bytes, {}\n",
-                        DatabaseDescriptor.getMemtableAllocationType(),
-                        memtable.operationCount(),
-                        FBUtilities.prettyPrintMemory(memtable.getLiveDataSize()),
-                        usage);
-
-            long deepSizeAfter = ObjectSizes.measureDeep(memtable);
-            logger.info("Memtable deep size {}\n",
-                        FBUtilities.prettyPrintMemory(deepSizeAfter));
-
-            long expectedHeap = deepSizeAfter - deepSizeBefore;
-            String message = String.format("Expected heap usage close to %s, got %s.\n",
-                                           FBUtilities.prettyPrintMemory(expectedHeap),
-                                           FBUtilities.prettyPrintMemory(actualHeap));
-            logger.info(message);
-            Assert.assertTrue(message, Math.abs(actualHeap - expectedHeap) <= MAX_DIFFERENCE_PARTITIONS * differencePerPartition);
-        }
-        catch (Throwable throwable)
-        {
-            Throwables.throwIfUnchecked(throwable);
-            throw new RuntimeException(throwable);
-        }
-    }
-}
\ No newline at end of file
diff --git a/test/unit/org/apache/cassandra/db/ClusteringHeapSizeTest.java b/test/unit/org/apache/cassandra/db/ClusteringHeapSizeTest.java
index e97f067aa0..3ba4ae650e 100644
--- a/test/unit/org/apache/cassandra/db/ClusteringHeapSizeTest.java
+++ b/test/unit/org/apache/cassandra/db/ClusteringHeapSizeTest.java
@@ -61,6 +61,22 @@ public class ClusteringHeapSizeTest
         Assertions.assertThat(min).isLessThanOrEqualTo(max);
     }
 
+    @Test
+    public void testSingletonClusteringHeapSize()
+    {
+        Clustering<?> clustering = this.clustering.accessor().factory().staticClustering();
+        Assertions.assertThat(clustering.unsharedHeapSize())
+                  .isEqualTo(0);
+        Assertions.assertThat(clustering.unsharedHeapSizeExcludingData())
+                  .isEqualTo(0);
+
+        clustering = this.clustering.accessor().factory().clustering();
+        Assertions.assertThat(clustering.unsharedHeapSize())
+                  .isEqualTo(0);
+        Assertions.assertThat(clustering.unsharedHeapSizeExcludingData())
+                  .isEqualTo(0);
+    }
+
     @Parameterized.Parameters(name = "{0}")
     public static Collection<Object[]> data() {
         byte[] rawBytes = { 0, 1, 2, 3, 4, 5, 6 };
diff --git a/test/unit/org/apache/cassandra/cql3/MemtableQuickTest.java b/test/unit/org/apache/cassandra/db/memtable/MemtableQuickTest.java
similarity index 97%
rename from test/unit/org/apache/cassandra/cql3/MemtableQuickTest.java
rename to test/unit/org/apache/cassandra/db/memtable/MemtableQuickTest.java
index ee5da9253e..0d539a80cc 100644
--- a/test/unit/org/apache/cassandra/cql3/MemtableQuickTest.java
+++ b/test/unit/org/apache/cassandra/db/memtable/MemtableQuickTest.java
@@ -16,7 +16,7 @@
  * limitations under the License.
  */
 
-package org.apache.cassandra.cql3;
+package org.apache.cassandra.db.memtable;
 
 import java.util.List;
 
@@ -30,6 +30,8 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import org.apache.cassandra.Util;
+import org.apache.cassandra.cql3.CQLTester;
+import org.apache.cassandra.cql3.UntypedResultSet;
 import org.apache.cassandra.db.ColumnFamilyStore;
 import org.apache.cassandra.db.Keyspace;
 
diff --git a/test/unit/org/apache/cassandra/db/memtable/MemtableSizeHeapBuffersTest.java b/test/unit/org/apache/cassandra/db/memtable/MemtableSizeHeapBuffersTest.java
new file mode 100644
index 0000000000..b448b35ffb
--- /dev/null
+++ b/test/unit/org/apache/cassandra/db/memtable/MemtableSizeHeapBuffersTest.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.db.memtable;
+
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import org.apache.cassandra.config.Config;
+import org.apache.cassandra.utils.memory.MemtablePool;
+import org.apache.cassandra.utils.memory.SlabPool;
+
+@RunWith(Parameterized.class)
+public class MemtableSizeHeapBuffersTest extends MemtableSizeTestBase
+{
+    // Overrides CQLTester.setUpClass to run before it
+    @BeforeClass
+    public static void setUpClass()
+    {
+        setup(Config.MemtableAllocationType.heap_buffers);
+    }
+
+    @Override
+    void checkMemtablePool()
+    {
+        MemtablePool memoryPool = AbstractAllocatorMemtable.MEMORY_POOL;
+        logger.info("Memtable pool {} off-heap limit {}", memoryPool, memoryPool.offHeap.limit);
+        Assert.assertTrue(memoryPool instanceof SlabPool);
+        Assert.assertTrue(memoryPool.offHeap.limit == 0);
+    }
+}
\ No newline at end of file
diff --git a/test/unit/org/apache/cassandra/db/memtable/MemtableSizeOffheapBuffersTest.java b/test/unit/org/apache/cassandra/db/memtable/MemtableSizeOffheapBuffersTest.java
new file mode 100644
index 0000000000..64f77e2082
--- /dev/null
+++ b/test/unit/org/apache/cassandra/db/memtable/MemtableSizeOffheapBuffersTest.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.db.memtable;
+
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import org.apache.cassandra.config.Config;
+import org.apache.cassandra.utils.memory.MemtablePool;
+import org.apache.cassandra.utils.memory.SlabPool;
+
+@RunWith(Parameterized.class)
+public class MemtableSizeOffheapBuffersTest extends MemtableSizeTestBase
+{
+    // Overrides CQLTester.setUpClass to run before it
+    @BeforeClass
+    public static void setUpClass()
+    {
+        setup(Config.MemtableAllocationType.offheap_buffers);
+    }
+
+
+    @Override
+    void checkMemtablePool()
+    {
+        MemtablePool memoryPool = AbstractAllocatorMemtable.MEMORY_POOL;
+        logger.info("Memtable pool {} off-heap limit {}", memoryPool, memoryPool.offHeap.limit);
+        Assert.assertTrue(memoryPool instanceof SlabPool);
+        Assert.assertTrue(memoryPool.offHeap.limit > 0);
+    }
+}
\ No newline at end of file
diff --git a/test/unit/org/apache/cassandra/db/memtable/MemtableSizeOffheapObjectsTest.java b/test/unit/org/apache/cassandra/db/memtable/MemtableSizeOffheapObjectsTest.java
new file mode 100644
index 0000000000..b2aa12cda0
--- /dev/null
+++ b/test/unit/org/apache/cassandra/db/memtable/MemtableSizeOffheapObjectsTest.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.db.memtable;
+
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import org.apache.cassandra.config.Config;
+import org.apache.cassandra.utils.memory.MemtablePool;
+import org.apache.cassandra.utils.memory.NativePool;
+
+@RunWith(Parameterized.class)
+public class MemtableSizeOffheapObjectsTest extends MemtableSizeTestBase
+{
+    // Overrides CQLTester.setUpClass to run before it
+    @BeforeClass
+    public static void setUpClass()
+    {
+        setup(Config.MemtableAllocationType.offheap_objects);
+    }
+
+    @Override
+    void checkMemtablePool()
+    {
+        MemtablePool memoryPool = AbstractAllocatorMemtable.MEMORY_POOL;
+        System.out.println("Memtable pool " + memoryPool + " off-heap limit " + memoryPool.offHeap.limit);
+        Assert.assertTrue(memoryPool instanceof NativePool);
+    }
+}
\ No newline at end of file
diff --git a/test/unit/org/apache/cassandra/db/memtable/MemtableSizeTestBase.java b/test/unit/org/apache/cassandra/db/memtable/MemtableSizeTestBase.java
new file mode 100644
index 0000000000..252d7bf14e
--- /dev/null
+++ b/test/unit/org/apache/cassandra/db/memtable/MemtableSizeTestBase.java
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.db.memtable;
+
+import java.lang.reflect.Field;
+import java.util.List;
+
+import com.google.common.base.Throwables;
+import com.google.common.collect.ImmutableList;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runners.Parameterized;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.cassandra.Util;
+import org.apache.cassandra.config.Config;
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.cql3.CQLTester;
+import org.apache.cassandra.db.ColumnFamilyStore;
+import org.apache.cassandra.db.Keyspace;
+import org.apache.cassandra.utils.FBUtilities;
+import org.github.jamm.MemoryMeter;
+
+// Note: This test can be run in idea with the allocation type configured in the test yaml and memtable using the
+// value memtableClass is initialized with.
+public class MemtableSizeTestBase extends CQLTester
+{
+    // Note: To see a printout of the usage for each object, add .enableDebug() here (most useful with smaller number of
+    // partitions).
+    static MemoryMeter meter = new MemoryMeter().ignoreKnownSingletons()
+                                                .withGuessing(MemoryMeter.Guess.FALLBACK_UNSAFE);
+
+    static final Logger logger = LoggerFactory.getLogger(MemtableSizeTestBase.class);
+
+    static final int partitions = 50_000;
+    static final int rowsPerPartition = 4;
+
+    static final int deletedPartitions = 10_000;
+    static final int deletedRows = 5_000;
+
+    @Parameterized.Parameter(0)
+    public String memtableClass = "skiplist";
+
+    @Parameterized.Parameters(name = "{0}")
+    public static List<Object> parameters()
+    {
+        return ImmutableList.of("skiplist",
+                                "skiplist_sharded");
+    }
+
+    // Must be within 3% of the real usage. We are actually more precise than this, but the threshold is set higher to
+    // avoid flakes. For on-heap allocators we allow for extra overheads below.
+    final int MAX_DIFFERENCE_PERCENT = 3;
+    // Slab overhead, added when the memtable uses heap_buffers.
+    final int SLAB_OVERHEAD = 1024 * 1024;
+
+    public static void setup(Config.MemtableAllocationType allocationType)
+    {
+        try
+        {
+            Field confField = DatabaseDescriptor.class.getDeclaredField("conf");
+            confField.setAccessible(true);
+            Config conf = (Config) confField.get(null);
+            conf.memtable_allocation_type = allocationType;
+            conf.memtable_cleanup_threshold = 0.8f; // give us more space to fit test data without flushing
+        }
+        catch (NoSuchFieldException | IllegalAccessException e)
+        {
+            throw Throwables.propagate(e);
+        }
+
+        CQLTester.setUpClass();
+        CQLTester.prepareServer();
+        logger.info("setupClass done, allocation type {}", allocationType);
+    }
+
+    void checkMemtablePool()
+    {
+        // overridden by instances
+    }
+
+    @Test
+    public void testSize() throws Throwable
+    {
+        // Make sure memtables use the correct allocation type, i.e. that setup has worked.
+        // If this fails, make sure the test is not reusing an already-initialized JVM.
+        checkMemtablePool();
+
+        CQLTester.disablePreparedReuseForTest();
+        String keyspace = createKeyspace("CREATE KEYSPACE %s with replication = { 'class' : 'SimpleStrategy', 'replication_factor' : 1 } and durable_writes = false");
+        try
+        {
+            String table = createTable(keyspace, "CREATE TABLE %s ( userid bigint, picid bigint, commentid bigint, PRIMARY KEY(userid, picid))" +
+                                                 " with compression = {'enabled': false}" +
+                                                 " and memtable = '" + memtableClass + "'");
+            execute("use " + keyspace + ';');
+
+            String writeStatement = "INSERT INTO " + table + "(userid,picid,commentid)VALUES(?,?,?)";
+            forcePreparedValues();
+
+            ColumnFamilyStore cfs = Keyspace.open(keyspace).getColumnFamilyStore(table);
+            cfs.disableAutoCompaction();
+            Util.flush(cfs);
+
+            Memtable memtable = cfs.getTracker().getView().getCurrentMemtable();
+            long deepSizeBefore = meter.measureDeep(memtable);
+            logger.info("Memtable deep size before {}", FBUtilities.prettyPrintMemory(deepSizeBefore));
+            long i;
+            long limit = partitions;
+            logger.info("Writing {} partitions of {} rows", partitions, rowsPerPartition);
+            for (i = 0; i < limit; ++i)
+            {
+                for (long j = 0; j < rowsPerPartition; ++j)
+                    execute(writeStatement, i, j, i + j);
+            }
+
+            logger.info("Deleting {} partitions", deletedPartitions);
+            limit += deletedPartitions;
+            for (; i < limit; ++i)
+            {
+                // no partition exists, but we will create a tombstone
+                execute("DELETE FROM " + table + " WHERE userid = ?", i);
+            }
+
+            logger.info("Deleting {} rows", deletedRows);
+            limit += deletedRows;
+            for (; i < limit; ++i)
+            {
+                // no row exists, but we will create a tombstone (and partition)
+                execute("DELETE FROM " + table + " WHERE userid = ? AND picid = ?", i, 0L);
+            }
+
+            Assert.assertSame("Memtable flushed during test. Test was not carried out correctly.",
+                              memtable,
+                              cfs.getTracker().getView().getCurrentMemtable());
+
+            Memtable.MemoryUsage usage = Memtable.getMemoryUsage(memtable);
+            long actualHeap = usage.ownsOnHeap;
+            logger.info(String.format("Memtable in %s mode: %d ops, %s serialized bytes, %s",
+                                      DatabaseDescriptor.getMemtableAllocationType(),
+                                      memtable.operationCount(),
+                                      FBUtilities.prettyPrintMemory(memtable.getLiveDataSize()),
+                                      usage));
+
+            long deepSizeAfter = meter.measureDeep(memtable);
+            logger.info("Memtable deep size {}", FBUtilities.prettyPrintMemory(deepSizeAfter));
+
+            long expectedHeap = deepSizeAfter - deepSizeBefore;
+            long max_difference = MAX_DIFFERENCE_PERCENT * expectedHeap / 100;
+            switch (DatabaseDescriptor.getMemtableAllocationType())
+            {
+                case heap_buffers:
+                    max_difference += SLAB_OVERHEAD;
+                    break;
+            }
+            String message = String.format("Expected heap usage close to %s, got %s, %s difference.\n",
+                                           FBUtilities.prettyPrintMemory(expectedHeap),
+                                           FBUtilities.prettyPrintMemory(actualHeap),
+                                           FBUtilities.prettyPrintMemory(expectedHeap - actualHeap));
+            logger.info(message);
+            if (Math.abs(actualHeap - expectedHeap) > max_difference)
+            {
+                // Under Java 11, it seems the meter can reach into phantom reference queues and count more space than
+                // is actually reachable. Unfortunately ignoreNonStrongReferences() does not help (worse, it throws
+                // exceptions trying to get a phantom referrent). Retrying the measurement appears to clear these up.
+                Thread.sleep(50);
+                long secondPass = meter.measureDeep(memtable);
+                logger.error("Deep size first pass {} second pass {}",
+                             FBUtilities.prettyPrintMemory(deepSizeAfter),
+                             FBUtilities.prettyPrintMemory(secondPass));
+                expectedHeap = secondPass - deepSizeBefore;
+            }
+
+            Assert.assertTrue(message, Math.abs(actualHeap - expectedHeap) <= max_difference);
+        }
+        finally
+        {
+            execute(String.format("DROP KEYSPACE IF EXISTS %s", keyspace));
+        }
+    }
+}
\ No newline at end of file
diff --git a/test/unit/org/apache/cassandra/db/memtable/MemtableSizeUnslabbedTest.java b/test/unit/org/apache/cassandra/db/memtable/MemtableSizeUnslabbedTest.java
new file mode 100644
index 0000000000..1d2d460660
--- /dev/null
+++ b/test/unit/org/apache/cassandra/db/memtable/MemtableSizeUnslabbedTest.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.db.memtable;
+
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import org.apache.cassandra.config.Config;
+import org.apache.cassandra.utils.memory.HeapPool;
+import org.apache.cassandra.utils.memory.MemtablePool;
+
+@RunWith(Parameterized.class)
+public class MemtableSizeUnslabbedTest extends MemtableSizeTestBase
+{
+    // Overrides CQLTester.setUpClass to run before it
+    @BeforeClass
+    public static void setUpClass()
+    {
+        setup(Config.MemtableAllocationType.unslabbed_heap_buffers);
+    }
+
+    @Override
+    void checkMemtablePool()
+    {
+        MemtablePool memoryPool = AbstractAllocatorMemtable.MEMORY_POOL;
+        logger.info("Memtable pool {} off-heap limit {}", memoryPool, memoryPool.offHeap.limit);
+        Assert.assertTrue(memoryPool instanceof HeapPool);
+    }
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@cassandra.apache.org
For additional commands, e-mail: commits-help@cassandra.apache.org